summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/compressed/relocs.c7
-rw-r--r--arch/x86/boot/memory.c29
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/ia32/sys_ia32.c19
-rw-r--r--arch/x86/include/asm/apic.h28
-rw-r--r--arch/x86/include/asm/apicdef.h8
-rw-r--r--arch/x86/include/asm/cpufeature.h6
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h21
-rw-r--r--arch/x86/include/asm/i8259.h4
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h1
-rw-r--r--arch/x86/include/asm/lguest_hcall.h2
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/mpspec.h15
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/pat.h4
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/include/asm/processor.h9
-rw-r--r--arch/x86/include/asm/ptrace.h7
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h5
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c156
-rw-r--r--arch/x86/kernel/amd_iommu_init.c16
-rw-r--r--arch/x86/kernel/apic/apic.c311
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/es7000_32.c10
-rw-r--r--arch/x86/kernel/apic/io_apic.c903
-rw-r--r--arch/x86/kernel/apic/nmi.c5
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/apic/summit_32.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c36
-rw-r--r--arch/x86/kernel/bios_uv.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c32
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c38
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c57
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c35
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/e820.c11
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/irq.c19
-rw-r--r--arch/x86/kernel/irqinit.c (renamed from arch/x86/kernel/irqinit_32.c)149
-rw-r--r--arch/x86/kernel/irqinit_64.c177
-rw-r--r--arch/x86/kernel/kgdb.c3
-rw-r--r--arch/x86/kernel/kvmclock.c7
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode_core.c2
-rw-r--r--arch/x86/kernel/mpparse.c34
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/smp.c20
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/tlb_uv.c191
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/uv_sysfs.c4
-rw-r--r--arch/x86/kernel/uv_time.c10
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c4
-rw-r--r--arch/x86/kvm/mmu.c5
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/x86.c20
-rw-r--r--arch/x86/lguest/Makefile1
-rw-r--r--arch/x86/lguest/boot.c37
-rw-r--r--arch/x86/mm/hugetlbpage.c6
-rw-r--r--arch/x86/mm/init.c18
-rw-r--r--arch/x86/mm/ioremap.c10
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/numa_64.c3
-rw-r--r--arch/x86/mm/pageattr.c140
-rw-r--r--arch/x86/mm/pat.c189
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c7
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/pci/amd_bus.c6
-rw-r--r--arch/x86/pci/common.c5
-rw-r--r--arch/x86/pci/i386.c4
-rw-r--r--arch/x86/pci/irq.c84
-rw-r--r--arch/x86/pci/mmconfig-shared.c4
-rw-r--r--arch/x86/vdso/vclock_gettime.c12
-rw-r--r--arch/x86/xen/Makefile5
-rw-r--r--arch/x86/xen/mmu.c6
-rw-r--r--arch/x86/xen/time.c7
-rw-r--r--arch/x86/xen/xen-ops.h19
112 files changed, 1656 insertions, 1599 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3e0f80a764a7..aafae3b140de 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -279,14 +279,9 @@ config SPARSE_IRQ
If you don't know what to do here, say N.
-config NUMA_MIGRATE_IRQ_DESC
- bool "Move irq desc when changing irq smp_affinity"
+config NUMA_IRQ_DESC
+ def_bool y
depends on SPARSE_IRQ && NUMA
- default n
- ---help---
- This enables moving irq_desc to cpu/node that irq will use handled.
-
- If you don't know what to do here, say N.
config X86_MPPARSE
bool "Enable MPS table" if ACPI
@@ -358,7 +353,8 @@ config X86_UV
bool "SGI Ultraviolet"
depends on X86_64
depends on X86_EXTENDED_PLATFORM
- select X86_X2APIC
+ depends on NUMA
+ depends on X86_X2APIC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
@@ -501,6 +497,19 @@ config PARAVIRT
over full virtualization. However, when run without a hypervisor
the kernel is theoretically slower and slightly larger.
+config PARAVIRT_SPINLOCKS
+ bool "Paravirtualization layer for spinlocks"
+ depends on PARAVIRT && SMP && EXPERIMENTAL
+ ---help---
+ Paravirtualized spinlocks allow a pvops backend to replace the
+ spinlock implementation with something virtualization-friendly
+ (for example, block the virtual CPU rather than spinning).
+
+ Unfortunately the downside is an up to 5% performance hit on
+ native kernels, with various workloads.
+
+ If you are unsure how to answer this question, answer N.
+
config PARAVIRT_CLOCK
bool
default n
@@ -668,6 +677,7 @@ config MAXSMP
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
+ range 2 8 if SMP && X86_32 && !X86_BIGSMP
range 2 512 if SMP && !MAXSMP
default "1" if !SMP
default "4096" if MAXSMP
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 924e156a85ab..8130334329c0 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -506,6 +506,7 @@ config X86_PTRACE_BTS
bool "Branch Trace Store"
default y
depends on X86_DEBUGCTLMSR
+ depends on BROKEN
---help---
This adds a ptrace interface to the hardware's branch trace store.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e81f0b277761..edbd0ca62067 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -138,7 +138,7 @@ endif
boot := arch/x86/boot
-BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install
+BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
PHONY += bzImage $(BOOT_TARGETS)
@@ -156,6 +156,10 @@ bzImage: vmlinux
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
+PHONY += install
+install:
+ $(Q)$(MAKE) $(build)=$(boot) $@
+
PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/x86/vdso $@
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 857e492c571e..bbeb0c3fbd90 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
if (sym->st_shndx == SHN_ABS) {
continue;
}
- if (r_type == R_386_PC32) {
- /* PC relative relocations don't need to be adjusted */
+ if (r_type == R_386_NONE || r_type == R_386_PC32) {
+ /*
+ * NONE can be ignored and and PC relative
+ * relocations don't need to be adjusted.
+ */
}
else if (r_type == R_386_32) {
/* Visit relocations that need to be adjusted */
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 5054c2ddd1a0..74b3d2ba84e9 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -17,11 +17,6 @@
#define SMAP 0x534d4150 /* ASCII "SMAP" */
-struct e820_ext_entry {
- struct e820entry std;
- u32 ext_flags;
-} __attribute__((packed));
-
static int detect_memory_e820(void)
{
int count = 0;
@@ -29,13 +24,21 @@ static int detect_memory_e820(void)
u32 size, id, edi;
u8 err;
struct e820entry *desc = boot_params.e820_map;
- static struct e820_ext_entry buf; /* static so it is zeroed */
+ static struct e820entry buf; /* static so it is zeroed */
/*
- * Set this here so that if the BIOS doesn't change this field
- * but still doesn't change %ecx, we're still okay...
+ * Note: at least one BIOS is known which assumes that the
+ * buffer pointed to by one e820 call is the same one as
+ * the previous call, and only changes modified fields. Therefore,
+ * we use a temporary buffer and copy the results entry by entry.
+ *
+ * This routine deliberately does not try to account for
+ * ACPI 3+ extended attributes. This is because there are
+ * BIOSes in the field which report zero for the valid bit for
+ * all ranges, and we don't currently make any use of the
+ * other attribute bits. Revisit this if we see the extended
+ * attribute bits deployed in a meaningful way in the future.
*/
- buf.ext_flags = 1;
do {
size = sizeof buf;
@@ -66,13 +69,7 @@ static int detect_memory_e820(void)
break;
}
- /* ACPI 3.0 added the extended flags support. If bit 0
- in the extended flags is zero, we're supposed to simply
- ignore the entry -- a backwards incompatible change! */
- if (size > 20 && !(buf.ext_flags & 1))
- continue;
-
- *desc++ = buf.std;
+ *desc++ = buf;
count++;
} while (next && count < ARRAY_SIZE(boot_params.e820_map));
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4ba7d4ef9aac..cee1dd2e69b2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -209,7 +209,6 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_SMP=y
CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
CONFIG_X86_MPPARSE=y
CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_VSMP is not set
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index efac92fd1efb..085a8c35f149 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -129,21 +129,12 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
struct stat64 __user *statbuf, int flag)
{
struct kstat stat;
- int error = -EINVAL;
+ int error;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
-
- if (!error)
- error = cp_stat64(statbuf, &stat);
-
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_stat64(statbuf, &stat);
}
/*
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f8377422..3738438a91f5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);
-#define EIM_8BIT_APIC_ID 0
-#define EIM_32BIT_APIC_ID 1
+extern int x2apic_mode;
#ifdef CONFIG_X86_X2APIC
/*
@@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void)
return val;
}
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
extern void check_x2apic(void);
extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
extern void x2apic_icr_write(u32 low, u32 id);
static inline int x2apic_enabled(void)
{
@@ -183,6 +181,8 @@ static inline int x2apic_enabled(void)
return 1;
return 0;
}
+
+#define x2apic_supported() (cpu_has_x2apic)
#else
static inline void check_x2apic(void)
{
@@ -190,28 +190,20 @@ static inline void check_x2apic(void)
static inline void enable_x2apic(void)
{
}
-static inline void enable_IR_x2apic(void)
-{
-}
static inline int x2apic_enabled(void)
{
return 0;
}
-#define x2apic 0
-
+#define x2apic_preenabled 0
+#define x2apic_supported() 0
#endif
-extern int get_physical_broadcast(void);
+extern void enable_IR_x2apic(void);
-#ifdef CONFIG_X86_X2APIC
-static inline void ack_x2APIC_irq(void)
-{
- /* Docs say use 0 for future compatibility */
- native_apic_msr_write(APIC_EOI, 0);
-}
-#endif
+extern int get_physical_broadcast(void);
+extern void apic_disable(void);
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC(void);
@@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
-
+static inline void apic_disable(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bc9514fb3b13..7ddb36ab933b 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -22,6 +22,7 @@
# define APIC_INTEGRATED(x) (1)
#endif
#define APIC_XAPIC(x) ((x) >= 0x14)
+#define APIC_EXT_SPACE(x) ((x) & 0x80000000)
#define APIC_TASKPRI 0x80
#define APIC_TPRI_MASK 0xFFu
#define APIC_ARBPRI 0x90
@@ -116,7 +117,9 @@
#define APIC_TDR_DIV_32 0x8
#define APIC_TDR_DIV_64 0x9
#define APIC_TDR_DIV_128 0xA
-#define APIC_EILVT0 0x500
+#define APIC_EFEAT 0x400
+#define APIC_ECTRL 0x410
+#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
@@ -125,9 +128,6 @@
#define APIC_EILVT_MSG_NMI 0x4
#define APIC_EILVT_MSG_EXT 0x7
#define APIC_EILVT_MASKED (1 << 16)
-#define APIC_EILVT1 0x510
-#define APIC_EILVT2 0x520
-#define APIC_EILVT3 0x530
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
#define APIC_BASE_MSR 0x800
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c397aa..13cc6a503a02 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,7 +22,7 @@
#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
@@ -192,11 +192,11 @@ extern const char * const x86_power_flags[32];
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
#define setup_clear_cpu_cap(bit) do { \
clear_cpu_cap(&boot_cpu_data, bit); \
- set_bit(bit, (unsigned long *)cleared_cpu_caps); \
+ set_bit(bit, (unsigned long *)cpu_caps_cleared); \
} while (0)
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
- clear_bit(bit, (unsigned long *)cleared_cpu_caps); \
+ set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 5623c50d67b2..c45f415ce315 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -37,7 +37,7 @@ extern gate_desc idt_table[];
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 039db6aa8e02..37555e52f980 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -26,7 +26,7 @@ typedef struct {
#endif
} ____cacheline_aligned irq_cpustat_t;
-DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea49bd70..a7d14bbae110 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -63,7 +63,26 @@ extern unsigned long io_apic_irqs;
extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+
+struct io_apic_irq_attr {
+ int ioapic;
+ int ioapic_pin;
+ int trigger;
+ int polarity;
+};
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+ int ioapic, int ioapic_pin,
+ int trigger, int polarity)
+{
+ irq_attr->ioapic = ioapic;
+ irq_attr->ioapic_pin = ioapic_pin;
+ irq_attr->trigger = trigger;
+ irq_attr->polarity = polarity;
+}
+
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
+ struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1a99e6c092af..58d7091eeb1f 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip;
extern void mask_8259A(void);
extern void unmask_8259A(void);
-#ifdef CONFIG_X86_32
-extern void init_ISA_irqs(void);
-#endif
-
#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e436010..daf866ed0612 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,22 +154,19 @@ extern int timer_through_8259;
extern int io_apic_get_unique_id(int ioapic, int apic_id);
extern int io_apic_get_version(int ioapic);
extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
- int edge_level, int active_high_low);
#endif /* CONFIG_ACPI */
+struct io_apic_irq_attr;
+extern int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr);
extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_init_mappings(void);
-#ifdef CONFIG_X86_64
extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
- struct IO_APIC_route_entry **ioapic_entries);
-#endif
extern void probe_nr_irqs_gsi(void);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760fccb8..f275e2244505 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
#ifndef _ASM_X86_IRQ_REMAPPING_H
#define _ASM_X86_IRQ_REMAPPING_H
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79bbb47c..910b5a3d6751 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,6 +34,7 @@
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
+# define IA32_SYSCALL_VECTOR 0x80
#else
# define IA32_SYSCALL_VECTOR 0x80
#endif
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 0f4ee7148afe..faae1996487b 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -5,7 +5,6 @@
#define LHCALL_FLUSH_ASYNC 0
#define LHCALL_LGUEST_INIT 1
#define LHCALL_SHUTDOWN 2
-#define LHCALL_LOAD_GDT 3
#define LHCALL_NEW_PGTABLE 4
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
@@ -17,6 +16,7 @@
#define LHCALL_SET_PMD 15
#define LHCALL_LOAD_TLS 16
#define LHCALL_NOTIFY 17
+#define LHCALL_LOAD_GDT_ENTRY 18
#define LGUEST_TRAP_ENTRY 0x1F
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 563933e06a35..4f8c199584e7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -137,6 +137,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */
+ MCP_DONTLOG = (1 << 2), /* only clear, don't log */
};
extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7fc8cdc..e2a1bb6d71ea 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -61,9 +61,11 @@ extern void get_smp_config(void);
#ifdef CONFIG_X86_MPPARSE
extern void find_smp_config(void);
extern void early_reserve_e820_mpc_new(void);
+extern int enable_update_mptable;
#else
static inline void find_smp_config(void) { }
static inline void early_reserve_e820_mpc_new(void) { }
+#define enable_update_mptable 0
#endif
void __cpuinit generic_processor_info(int apicid, int version);
@@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi);
extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+ int active_high_low);
extern int acpi_probe_gsi(void);
#ifdef CONFIG_X86_IO_APIC
-extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity);
extern int mp_find_ioapic(int gsi);
extern int mp_find_ioapic_pin(int ioapic, int gsi);
-#else
-static inline int
-mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity)
-{
- return 0;
-}
#endif
#else /* !CONFIG_ACPI: */
static inline int acpi_probe_gsi(void)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 378e3691c08c..a53da004e08e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1443,7 +1443,7 @@ u64 _paravirt_ident_64(u64);
#define paravirt_nop ((void *)_paravirt_nop)
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
{
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 2cd07b9422f4..7af14e512f97 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -18,9 +18,5 @@ extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
unsigned long flag);
-extern void map_devmem(unsigned long pfn, unsigned long size,
- struct pgprot vma_prot);
-extern void unmap_devmem(unsigned long pfn, unsigned long size,
- struct pgprot vma_prot);
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index aee103b26d01..02ecb30982a3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -82,22 +82,22 @@ do { \
case 1: \
asm(op "b %1,"__percpu_arg(0) \
: "+m" (var) \
- : "ri" ((T__)val)); \
+ : "qi" ((T__)(val))); \
break; \
case 2: \
asm(op "w %1,"__percpu_arg(0) \
: "+m" (var) \
- : "ri" ((T__)val)); \
+ : "ri" ((T__)(val))); \
break; \
case 4: \
asm(op "l %1,"__percpu_arg(0) \
: "+m" (var) \
- : "ri" ((T__)val)); \
+ : "ri" ((T__)(val))); \
break; \
case 8: \
asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \
- : "re" ((T__)val)); \
+ : "re" ((T__)(val))); \
break; \
default: __bad_percpu_size(); \
} \
@@ -109,7 +109,7 @@ do { \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
- : "=r" (ret__) \
+ : "=q" (ret__) \
: "m" (var)); \
break; \
case 2: \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6384d25121ca..6469866a7410 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -135,10 +135,11 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cleared_cpu_caps[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS];
+extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
#define current_cpu_data __get_cpu_var(cpu_info)
#else
@@ -270,7 +271,7 @@ struct tss_struct {
} ____cacheline_aligned;
-DECLARE_PER_CPU(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
/*
* Save the original ist values for checking stack pointers during debugging
@@ -393,7 +394,7 @@ union irq_stack_union {
};
};
-DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union);
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index e304b66abeea..624f133943ed 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -187,14 +187,15 @@ static inline int v8086_mode(struct pt_regs *regs)
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps. So regs will be the current sp.
+ * when it traps. The previous stack will be directly underneath the saved
+ * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
*
* This is valid only for kernel mode traps.
*/
-static inline unsigned long kernel_trap_sp(struct pt_regs *regs)
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
- return (unsigned long)regs;
+ return (unsigned long)(&regs->sp);
#else
return regs->sp;
#endif
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bdc2ada05ae0..4093d1ed6db2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -33,7 +33,6 @@ struct x86_quirks {
int (*setup_ioapic_ids)(void);
};
-extern void x86_quirk_pre_intr_init(void);
extern void x86_quirk_intr_init(void);
extern void x86_quirk_trap_init(void);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19e0d88b966d..6a84ed166aec 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void);
static inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+ return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
}
#endif
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e5e6caffec87..b7e5db876399 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,7 +172,7 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
{
@@ -206,7 +206,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
__raw_spin_lock(lock);
}
-#endif
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
{
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d3539f998f88..16a5c84b0329 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -152,7 +152,7 @@ struct tlb_state {
struct mm_struct *active_mm;
int state;
};
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
static inline void reset_lazy_tlbstate(void)
{
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 892b119dba6f..f44b49abca49 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -200,7 +200,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot)
}
struct pci_bus;
-void set_pci_bus_resources_arch_default(struct pci_bus *b);
+void x86_pci_root_bus_res_quirks(struct pci_bus *b);
#ifdef CONFIG_SMP
#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index db68ac8a5ac2..2cae46c7c8a2 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -17,6 +17,11 @@
/* ========================================================================= */
/* UVH_BAU_DATA_CONFIG */
/* ========================================================================= */
+#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
+#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
+#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
+#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL
+/* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */
#define UVH_BAU_DATA_CONFIG 0x61680UL
#define UVH_BAU_DATA_CONFIG_32 0x0438
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 145cce75cda7..235f5927bb97 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y += setup.o i8259.o irqinit_$(BITS).o
+obj-y += setup.o i8259.o irqinit.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -89,7 +89,8 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d7f802..631086159c53 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -33,6 +33,7 @@
#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
+#include <linux/pci.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
* success: return IRQ number (>=0)
* failure: return < 0
*/
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
unsigned int irq;
unsigned int plat_gsi = gsi;
@@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
- if (triggering == ACPI_LEVEL_SENSITIVE)
+ if (trigger == ACPI_LEVEL_SENSITIVE)
eisa_set_level_irq(gsi);
}
#endif
#ifdef CONFIG_X86_IO_APIC
if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
- plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+ plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
}
#endif
acpi_gsi_to_irq(plat_gsi, &irq);
@@ -903,10 +904,8 @@ extern int es7000_plat;
#endif
static struct {
- int apic_id;
int gsi_base;
int gsi_end;
- DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
int mp_find_ioapic(int gsi)
@@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
mp_ioapics[idx].apicver = io_apic_get_version(idx);
-#else
- mp_ioapics[idx].apicver = 0;
-#endif
+
/*
* Build basic GSI lookup table to facilitate gsi->io_apic lookups
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
mp_ioapic_routing[idx].gsi_base = gsi_base;
mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
@@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void)
}
}
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
{
+#ifdef CONFIG_X86_MPPARSE
+ struct mpc_intsrc mp_irq;
+ struct pci_dev *pdev;
+ unsigned char number;
+ unsigned int devfn;
int ioapic;
- int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM 4096
-#define IRQ_COMPRESSION_START 64
+ u8 pin;
- static int pci_irq = IRQ_COMPRESSION_START;
- /*
- * Mapping between Global System Interrupts, which
- * represent all possible interrupts, and IRQs
- * assigned to actual devices.
- */
- static int gsi_to_irq[MAX_GSI_NUM];
-#else
+ if (!acpi_ioapic)
+ return 0;
+ if (!dev)
+ return 0;
+ if (dev->bus != &pci_bus_type)
+ return 0;
+
+ pdev = to_pci_dev(dev);
+ number = pdev->bus->number;
+ devfn = pdev->devfn;
+ pin = pdev->pin;
+ /* print the entry should happen on mptable identically */
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+ mp_irq.srcbus = number;
+ mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ ioapic = mp_find_ioapic(gsi);
+ mp_irq.dstapic = mp_ioapics[ioapic].apicid;
+ mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+ save_mp_irq(&mp_irq);
+#endif
+ return 0;
+}
+
+int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+ int ioapic;
+ int ioapic_pin;
+ struct io_apic_irq_attr irq_attr;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
-#endif
/* Don't set up the ACPI SCI because it's already set up */
if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
gsi = ioapic_renumber_irq(ioapic, gsi);
#endif
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ "%d-%d\n", mp_ioapics[ioapic].apicid,
ioapic_pin);
return gsi;
}
- if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
- return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
- return gsi;
-#endif
- }
-
- set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
- /*
- * For GSI >= 64, use IRQ compression
- */
- if ((gsi >= IRQ_COMPRESSION_START)
- && (triggering == ACPI_LEVEL_SENSITIVE)) {
- /*
- * For PCI devices assign IRQs in order, avoiding gaps
- * due to unused I/O APIC pins.
- */
- int irq = gsi;
- if (gsi < MAX_GSI_NUM) {
- /*
- * Retain the VIA chipset work-around (gsi > 15), but
- * avoid a problem where the 8254 timer (IRQ0) is setup
- * via an override (so it's not on pin 0 of the ioapic),
- * and at the same time, the pin 0 interrupt is a PCI
- * type. The gsi > 15 test could cause these two pins
- * to be shared as IRQ0, and they are not shareable.
- * So test for this condition, and if necessary, avoid
- * the pin collision.
- */
- gsi = pci_irq++;
- /*
- * Don't assign IRQ used by ACPI SCI
- */
- if (gsi == acpi_gbl_FADT.sci_interrupt)
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
- } else {
- printk(KERN_ERR "GSI %u is too high\n", gsi);
- return gsi;
- }
- }
-#endif
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- return gsi;
-}
-int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
- u32 gsi, int triggering, int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
- struct mpc_intsrc mp_irq;
- int ioapic;
+ if (enable_update_mptable)
+ mp_config_acpi_gsi(dev, gsi, trigger, polarity);
- if (!acpi_ioapic)
- return 0;
+ set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
+ trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+ io_apic_set_pci_routing(dev, gsi, &irq_attr);
- /* print the entry should happen on mptable identically */
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
- (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
- mp_irq.srcbus = number;
- mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
- ioapic = mp_find_ioapic(gsi);
- mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
- mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
- save_mp_irq(&mp_irq);
-#endif
- return 0;
+ return gsi;
}
/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 42c33cebf00f..8c0be0902dac 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -49,10 +49,10 @@
#define IVHD_DEV_EXT_SELECT 0x46
#define IVHD_DEV_EXT_SELECT_RANGE 0x47
-#define IVHD_FLAG_HT_TUN_EN 0x00
-#define IVHD_FLAG_PASSPW_EN 0x01
-#define IVHD_FLAG_RESPASSPW_EN 0x02
-#define IVHD_FLAG_ISOC_EN 0x03
+#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
+#define IVHD_FLAG_PASSPW_EN_MASK 0x02
+#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
+#define IVHD_FLAG_ISOC_EN_MASK 0x08
#define IVMD_FLAG_EXCL_RANGE 0x08
#define IVMD_FLAG_UNITY_MAP 0x01
@@ -569,19 +569,19 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
* First set the recommended feature enable bits from ACPI
* into the IOMMU control registers
*/
- h->flags & IVHD_FLAG_HT_TUN_EN ?
+ h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
- h->flags & IVHD_FLAG_PASSPW_EN ?
+ h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
- h->flags & IVHD_FLAG_RESPASSPW_EN ?
+ h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
- h->flags & IVHD_FLAG_ISOC_EN ?
+ h->flags & IVHD_FLAG_ISOC_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
iommu_feature_disable(iommu, CONTROL_ISOC_EN);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2870920f246..a4c9cf0bf70b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -98,6 +98,29 @@ early_param("lapic", parse_lapic);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go through APIC */
+ outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+ /* select IMCR register */
+ outb(0x70, 0x22);
+ /* NMI and 8259 INTR go directly to BSP */
+ outb(0x00, 0x23);
+}
#endif
#ifdef CONFIG_X86_64
@@ -111,13 +134,19 @@ static __init int setup_apicpmtimer(char *s)
__setup("apicpmtimer", setup_apicpmtimer);
#endif
+int x2apic_mode;
#ifdef CONFIG_X86_X2APIC
-int x2apic;
/* x2apic enabled before OS handover */
static int x2apic_preenabled;
static int disable_x2apic;
static __init int setup_nox2apic(char *str)
{
+ if (x2apic_enabled()) {
+ pr_warning("Bios already enabled x2apic, "
+ "can't enforce nox2apic");
+ return 0;
+ }
+
disable_x2apic = 1;
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
return 0;
@@ -209,6 +238,31 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
+/*
+ * bare function to substitute write operation
+ * and it's _that_ fast :)
+ */
+static void native_apic_write_dummy(u32 reg, u32 v)
+{
+ WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+static u32 native_apic_read_dummy(u32 reg)
+{
+ WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ return 0;
+}
+
+/*
+ * right after this call apic->write/read doesn't do anything
+ * note that there is no restore operation it works one way
+ */
+void apic_disable(void)
+{
+ apic->read = native_apic_read_dummy;
+ apic->write = native_apic_write_dummy;
+}
+
void native_apic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -348,7 +402,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
{
- unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+ unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
unsigned int v = (mask << 16) | (msg_type << 8) | vector;
apic_write(reg, v);
@@ -815,7 +869,7 @@ void clear_local_APIC(void)
u32 v;
/* APIC hasn't been mapped yet */
- if (!x2apic && !apic_phys)
+ if (!x2apic_mode && !apic_phys)
return;
maxlvt = lapic_get_maxlvt();
@@ -1287,7 +1341,7 @@ void check_x2apic(void)
{
if (x2apic_enabled()) {
pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
- x2apic_preenabled = x2apic = 1;
+ x2apic_preenabled = x2apic_mode = 1;
}
}
@@ -1295,7 +1349,7 @@ void enable_x2apic(void)
{
int msr, msr2;
- if (!x2apic)
+ if (!x2apic_mode)
return;
rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1304,6 +1358,7 @@ void enable_x2apic(void)
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
+#endif /* CONFIG_X86_X2APIC */
void __init enable_IR_x2apic(void)
{
@@ -1312,32 +1367,21 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
- if (!cpu_has_x2apic)
- return;
-
- if (!x2apic_preenabled && disable_x2apic) {
- pr_info("Skipped enabling x2apic and Interrupt-remapping "
- "because of nox2apic\n");
- return;
+ ret = dmar_table_init();
+ if (ret) {
+ pr_debug("dmar_table_init() failed with %d:\n", ret);
+ goto ir_failed;
}
- if (x2apic_preenabled && disable_x2apic)
- panic("Bios already enabled x2apic, can't enforce nox2apic");
-
- if (!x2apic_preenabled && skip_ioapic_setup) {
- pr_info("Skipped enabling x2apic and Interrupt-remapping "
- "because of skipping io-apic setup\n");
- return;
+ if (!intr_remapping_supported()) {
+ pr_debug("intr-remapping not supported\n");
+ goto ir_failed;
}
- ret = dmar_table_init();
- if (ret) {
- pr_info("dmar_table_init() failed with %d:\n", ret);
- if (x2apic_preenabled)
- panic("x2apic enabled by bios. But IR enabling failed");
- else
- pr_info("Not enabling x2apic,Intr-remapping\n");
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ pr_info("Skipped enabling intr-remap because of skipping "
+ "io-apic setup\n");
return;
}
@@ -1357,19 +1401,16 @@ void __init enable_IR_x2apic(void)
mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
- ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
- if (ret && x2apic_preenabled) {
- local_irq_restore(flags);
- panic("x2apic enabled by bios. But IR enabling failed");
- }
-
+ ret = enable_intr_remapping(x2apic_supported());
if (ret)
goto end_restore;
- if (!x2apic) {
- x2apic = 1;
+ pr_info("Enabled Interrupt-remapping\n");
+
+ if (x2apic_supported() && !x2apic_mode) {
+ x2apic_mode = 1;
enable_x2apic();
+ pr_info("Enabled x2apic\n");
}
end_restore:
@@ -1378,37 +1419,34 @@ end_restore:
* IR enabling failed
*/
restore_IO_APIC_setup(ioapic_entries);
- else
- reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
unmask_8259A();
local_irq_restore(flags);
end:
- if (!ret) {
- if (!x2apic_preenabled)
- pr_info("Enabled x2apic and interrupt-remapping\n");
- else
- pr_info("Enabled Interrupt-remapping\n");
- } else
- pr_err("Failed to enable Interrupt-remapping and x2apic\n");
if (ioapic_entries)
free_ioapic_entries(ioapic_entries);
+
+ if (!ret)
+ return;
+
+ir_failed:
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else if (cpu_has_x2apic)
+ pr_info("Not enabling x2apic,Intr-remapping\n");
#else
if (!cpu_has_x2apic)
return;
if (x2apic_preenabled)
panic("x2apic enabled prior OS handover,"
- " enable CONFIG_INTR_REMAP");
-
- pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
- " and x2apic\n");
+ " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
#endif
return;
}
-#endif /* CONFIG_X86_X2APIC */
+
#ifdef CONFIG_X86_64
/*
@@ -1425,7 +1463,6 @@ static int __init detect_init_APIC(void)
}
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_physical_apicid = 0;
return 0;
}
#else
@@ -1539,32 +1576,49 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
- if (x2apic) {
+ unsigned int new_apicid;
+
+ if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
return;
}
- /*
- * If no local APIC can be found then set up a fake all
- * zeroes page to simulate the local APIC and another
- * one for the IO-APIC.
- */
+ /* If no local APIC can be found return early */
if (!smp_found_config && detect_init_APIC()) {
- apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- apic_phys = __pa(apic_phys);
- } else
+ /* lets NOP'ify apic operations */
+ pr_info("APIC: disable apic facility\n");
+ apic_disable();
+ } else {
apic_phys = mp_lapic_addr;
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
- APIC_BASE, apic_phys);
+ /*
+ * acpi lapic path already maps that address in
+ * acpi_register_lapic_address()
+ */
+ if (!acpi_lapic)
+ set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+
+ apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+ APIC_BASE, apic_phys);
+ }
/*
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = read_apic_id();
+ new_apicid = read_apic_id();
+ if (boot_cpu_physical_apicid != new_apicid) {
+ boot_cpu_physical_apicid = new_apicid;
+ /*
+ * yeah -- we lie about apic_version
+ * in case if apic was disabled via boot option
+ * but it's not a problem for SMP compiled kernel
+ * since smp_sanity_check is prepared for such a case
+ * and disable smp mode
+ */
+ apic_version[new_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
}
/*
@@ -1733,8 +1787,7 @@ void __init connect_bsp_APIC(void)
*/
apic_printk(APIC_VERBOSE, "leaving PIC mode, "
"enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
+ imcr_pic_to_apic();
}
#endif
if (apic->enable_apic_mode)
@@ -1762,8 +1815,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
*/
apic_printk(APIC_VERBOSE, "disabling APIC mode, "
"entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
+ imcr_apic_to_pic();
return;
}
#endif
@@ -1969,10 +2021,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
local_irq_save(flags);
disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
if (intr_remapping_enabled)
disable_intr_remapping();
-#endif
+
local_irq_restore(flags);
return 0;
}
@@ -1982,42 +2034,34 @@ static int lapic_resume(struct sys_device *dev)
unsigned int l, h;
unsigned long flags;
int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
- int ret;
+ int ret = 0;
struct IO_APIC_route_entry **ioapic_entries = NULL;
if (!apic_pm_state.active)
return 0;
local_irq_save(flags);
- if (x2apic) {
+ if (intr_remapping_enabled) {
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
WARN(1, "Alloc ioapic_entries in lapic resume failed.");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto restore;
}
ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
WARN(1, "Saving IO-APIC state failed: %d\n", ret);
free_ioapic_entries(ioapic_entries);
- return ret;
+ goto restore;
}
mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
- enable_x2apic();
}
-#else
- if (!apic_pm_state.active)
- return 0;
- local_irq_save(flags);
- if (x2apic)
+ if (x2apic_mode)
enable_x2apic();
-#endif
-
else {
/*
* Make sure the APICBASE points to the right address
@@ -2055,21 +2099,16 @@ static int lapic_resume(struct sys_device *dev)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
-#ifdef CONFIG_INTR_REMAP
- if (intr_remapping_enabled)
- reenable_intr_remapping(EIM_32BIT_APIC_ID);
-
- if (x2apic) {
+ if (intr_remapping_enabled) {
+ reenable_intr_remapping(x2apic_mode);
unmask_8259A();
restore_IO_APIC_setup(ioapic_entries);
free_ioapic_entries(ioapic_entries);
}
-#endif
-
+restore:
local_irq_restore(flags);
-
- return 0;
+ return ret;
}
/*
@@ -2117,31 +2156,14 @@ static void apic_pm_activate(void) { }
#endif /* CONFIG_PM */
#ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
+
+static int __cpuinit apic_cluster_num(void)
{
int i, clusters, zeros;
unsigned id;
u16 *bios_cpu_apicid;
DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
- /*
- * there is not this kind of box with AMD CPU yet.
- * Some AMD box with quadcore cpu and 8 sockets apicid
- * will be [4, 0x23] or [8, 0x27] could be thought to
- * vsmp box still need checking...
- */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
- return 0;
-
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
@@ -2177,18 +2199,67 @@ __cpuinit int apic_is_clustered_box(void)
++zeros;
}
- /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (is_vsmp_box() && clusters > 1)
+ return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+ if (multi)
+ return 0;
+ pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+ multi = 1;
+ return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+ {
+ .callback = set_multi,
+ .ident = "IBM System Summit2",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+ },
+ },
+ {}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+ if (multi_checked)
+ return;
+
+ dmi_check_system(multi_dmi_table);
+ multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+ dmi_check_multi();
+ if (multi)
return 1;
+ if (!is_vsmp_box())
+ return 0;
+
/*
- * If clusters > 2, then should be multi-chassis.
- * May have to revisit this when multi-core + hyperthreaded CPUs come
- * out, but AFAIK this will work even for them.
+ * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+ * not guaranteed to be synced between boards
*/
- return (clusters > 2);
+ if (apic_cluster_num() > 1)
+ return 1;
+
+ return 0;
}
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 306e5e88fb6f..d0c99abc26c3 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void)
static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
{
- return hard_smp_processor_id() >> index_msb;
+ return initial_apic_id >> index_msb;
}
struct apic apic_flat = {
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
* regardless of how many processors are present (x86_64 ES7000
* is an example).
*/
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "system APIC only can use physical flat");
return 1;
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 1c11b819f245..69328ac8de9c 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
@@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr)
}
#ifdef CONFIG_ACPI
-static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
struct es7000_oem_table *table;
@@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
return 0;
}
-static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
if (!oem_addr)
return;
@@ -306,7 +306,7 @@ static int es7000_check_dsdt(void)
static int es7000_acpi_ret;
/* Hook from generic ACPI tables.c */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr = 0;
int check_dsdt;
@@ -717,7 +717,7 @@ struct apic apic_es7000_cluster = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
-struct apic apic_es7000 = {
+struct apic __refdata apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a2789e42e162..f712f8ff403c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -129,12 +129,9 @@ struct irq_pin_list {
struct irq_pin_list *next;
};
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
{
struct irq_pin_list *pin;
- int node;
-
- node = cpu_to_node(cpu);
pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
@@ -148,9 +145,6 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- u8 move_desc_pending : 1;
-#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -212,12 +206,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
return cfg;
}
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
{
struct irq_cfg *cfg;
- int node;
-
- node = cpu_to_node(cpu);
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
@@ -238,13 +229,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(cpu);
+ desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
@@ -254,10 +245,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
return 0;
}
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
{
struct irq_pin_list *old_entry, *head, *tail, *entry;
@@ -266,7 +256,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
if (!old_entry)
return;
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry)
return;
@@ -276,7 +266,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
tail = entry;
old_entry = old_entry->next;
while (old_entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
entry = head;
while (entry) {
@@ -316,12 +306,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
}
void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int cpu)
+ struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
- cfg = get_one_free_irq_cfg(cpu);
+ cfg = get_one_free_irq_cfg(node);
if (!cfg)
return;
@@ -332,7 +322,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
- init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ init_copy_irq_2_pin(old_cfg, cfg, node);
}
static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +346,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
old_desc->chip_data = NULL;
}
}
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg = desc->chip_data;
-
- if (!cfg->move_in_progress) {
- /* it means that domain is not changed */
- if (!cpumask_intersects(desc->affinity, mask))
- cfg->move_desc_pending = 1;
- }
-}
-#endif
+/* end for move_irq_desc */
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +356,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
#endif
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -518,132 +489,18 @@ static void ioapic_mask_entry(int apic, int pin)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
- cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- entry = cfg->irq_2_pin;
- for (;;) {
- unsigned int reg;
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(irq))
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = entry->next;
- }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
-
- irq = desc->irq;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return BAD_APICID;
-
- /* check that before desc->addinity get updated */
- set_extra_move_desc(desc, mask);
-
- cpumask_copy(desc->affinity, mask);
-
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int dest;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- dest = set_desc_affinity(desc, mask);
- if (dest != BAD_APICID) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, cfg);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list *entry;
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
apic, pin);
@@ -663,7 +520,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin(cpu);
+ entry->next = get_one_free_irq_2_pin(node);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
@@ -672,7 +529,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
int oldapic, int oldpin,
int newapic, int newpin)
{
@@ -692,7 +549,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
}
static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -850,7 +707,6 @@ static int __init ioapic_pirq_setup(char *str)
__setup("pirq=", ioapic_pirq_setup);
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_INTR_REMAP
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -948,20 +804,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
return 0;
}
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
- struct IO_APIC_route_entry **ioapic_entries)
-
-{
- /*
- * for now plain restore of previous settings.
- * TBD: In the case of OS enabling interrupt-remapping,
- * IO-APIC RTE's need to be setup to point to interrupt-remapping
- * table entries. for now, do a plain restore, and wait for
- * the setup_IO_APIC_irqs() to do proper initialization.
- */
- restore_IO_APIC_setup(ioapic_entries);
-}
-
void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
{
int apic;
@@ -971,7 +813,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
kfree(ioapic_entries);
}
-#endif
/*
* Find the IRQ entry number of a certain pin.
@@ -1032,54 +873,6 @@ static int __init find_isa_irq_apic(int irq, int type)
return -1;
}
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (test_bit(bus, mp_bus_not_pci)) {
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
/*
* EISA Edge/Level control register, ELCR
@@ -1298,6 +1091,64 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG,
+ "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (test_bit(bus, mp_bus_not_pci)) {
+ apic_printk(APIC_VERBOSE,
+ "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL)
+ break;
+
+ if (!test_bit(lbus, mp_bus_not_pci) &&
+ !mp_irqs[i].irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ return irq;
+ }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ best_guess = irq;
+ }
+ }
+ }
+ return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
void lock_vector_lock(void)
{
/* Used to the online set of cpus does not change
@@ -1628,58 +1479,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
ioapic_write_entry(apic_id, pin, entry);
}
+static struct {
+ DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id, pin, idx, irq;
+ int apic_id = 0, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
- for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
- idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ apic_id = mp_find_ioapic(0);
+ if (apic_id < 0)
+ apic_id = 0;
+ }
+#endif
- irq = pin_2_irq(idx, apic_id, pin);
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1) {
+ if (!notcon) {
+ notcon = 1;
+ apic_printk(APIC_VERBOSE,
+ KERN_DEBUG " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ } else
+ apic_printk(APIC_VERBOSE, " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ continue;
+ }
+ if (notcon) {
+ apic_printk(APIC_VERBOSE,
+ " (apicid-pin) not connected\n");
+ notcon = 0;
+ }
- /*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
- */
- if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
- continue;
+ irq = pin_2_irq(idx, apic_id, pin);
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
- continue;
- }
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(apic_id, irq))
+ continue;
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
- irq_trigger(idx), irq_polarity(idx));
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
}
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, apic_id, pin);
+ /*
+ * don't mark it in pin_programmed, so later acpi could
+ * set it correctly when irq < 16
+ */
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ irq_trigger(idx), irq_polarity(idx));
}
if (notcon)
@@ -1869,7 +1732,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
__apicdebuginit(void) print_local_APIC(void *dummy)
{
- unsigned int v, ver, maxlvt;
+ unsigned int i, v, ver, maxlvt;
u64 icr;
if (apic_verbosity == APIC_QUIET)
@@ -1957,6 +1820,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
v = apic_read(APIC_TDCR);
printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ v = apic_read(APIC_EFEAT);
+ maxlvt = (v >> 16) & 0xff;
+ printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+ v = apic_read(APIC_ECTRL);
+ printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+ for (i = 0; i < maxlvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+ }
+ }
printk("\n");
}
@@ -2005,6 +1880,11 @@ __apicdebuginit(void) print_PIC(void)
__apicdebuginit(int) print_all_ICs(void)
{
print_PIC();
+
+ /* don't print out if apic is not there */
+ if (!cpu_has_apic || disable_apic)
+ return 0;
+
print_all_local_APICs();
print_IO_APIC();
@@ -2360,6 +2240,118 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ cfg->move_cleanup_count = 0;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ cfg->move_cleanup_count++;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+ unsigned int reg;
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned int irq;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return BAD_APICID;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
+ return BAD_APICID;
+
+ cpumask_copy(desc->affinity, mask);
+
+ return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int dest;
+ unsigned int irq;
+ int ret = -1;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ dest = set_desc_affinity(desc, mask);
+ if (dest != BAD_APICID) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, cfg);
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ return set_ioapic_affinity_irq_desc(desc, mask);
+}
#ifdef CONFIG_INTR_REMAP
@@ -2374,26 +2366,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
*/
-static void
+static int
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irte irte;
unsigned int dest;
unsigned int irq;
+ int ret = -1;
if (!cpumask_intersects(mask, cpu_online_mask))
- return;
+ return ret;
irq = desc->irq;
if (get_irte(irq, &irte))
- return;
+ return ret;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
+ return ret;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2409,27 +2400,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
send_cleanup_vector(cfg);
cpumask_copy(desc->affinity, mask);
+
+ return 0;
}
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
- migrate_ioapic_irq_desc(desc, mask);
+ return migrate_ioapic_irq_desc(desc, mask);
}
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- set_ir_ioapic_affinity_irq_desc(desc, mask);
+ return set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
+ return 0;
}
#endif
@@ -2491,86 +2485,19 @@ static void irq_complete_move(struct irq_desc **descp)
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- if (likely(!cfg->move_desc_pending))
- return;
-
- /* domain has not changed, but affinity did */
- me = smp_processor_id();
- if (cpumask_test_cpu(me, desc->affinity)) {
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
- cfg->move_desc_pending = 0;
- }
-#endif
+ if (likely(!cfg->move_in_progress))
return;
- }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
-#endif
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
- }
}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
-
- entry = cfg->irq_2_pin;
- for (;;) {
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- io_apic_eoi(apic, pin);
- entry = entry->next;
- }
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- ack_x2APIC_irq();
- eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
- ack_x2APIC_irq();
-}
-#endif
-
static void ack_apic_edge(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,9 +2561,6 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
- if (irq_remapped(irq))
- eoi_ioapic_irq(desc);
-
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2683,22 +2607,50 @@ static void ack_apic_level(unsigned int irq)
}
#ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ io_apic_eoi(apic, pin);
+ entry = entry->next;
+ }
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ir_ack_apic_edge(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_edge(irq);
-#endif
- return ack_apic_edge(irq);
+ ack_APIC_irq();
}
static void ir_ack_apic_level(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_level(irq);
-#endif
- return ack_apic_level(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ ack_APIC_irq();
+ eoi_ioapic_irq(desc);
}
#endif /* CONFIG_INTR_REMAP */
@@ -2903,7 +2855,7 @@ static inline void __init check_timer(void)
{
struct irq_desc *desc = irq_to_desc(0);
struct irq_cfg *cfg = desc->chip_data;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
int apic1, pin1, apic2, pin2;
unsigned long flags;
int no_pin1 = 0;
@@ -2969,7 +2921,7 @@ static inline void __init check_timer(void)
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+ add_pin_to_irq_node(cfg, node, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
} else {
/* for edge trigger, setup_IO_APIC_irq already
@@ -3006,7 +2958,7 @@ static inline void __init check_timer(void)
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
enable_8259A_irq(0);
if (timer_irq_works()) {
@@ -3218,14 +3170,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
{
/* Allocate an unused irq */
unsigned int irq;
unsigned int new;
unsigned long flags;
struct irq_cfg *cfg_new = NULL;
- int cpu = boot_cpu_id;
struct irq_desc *desc_new = NULL;
irq = 0;
@@ -3234,7 +3185,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ desc_new = irq_to_desc_alloc_node(new, node);
if (!desc_new) {
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
@@ -3243,6 +3194,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
if (cfg_new->vector != 0)
continue;
+
+ desc_new = move_irq_desc(desc_new, node);
+
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
break;
@@ -3260,11 +3214,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
int create_irq(void)
{
+ int node = cpu_to_node(boot_cpu_id);
unsigned int irq_want;
int irq;
irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
irq = -1;
@@ -3366,7 +3321,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3375,7 +3330,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3387,13 +3342,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg_desc(desc, &msg);
+
+ return 0;
}
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void
+static int
ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -3402,11 +3359,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
struct irte irte;
if (get_irte(irq, &irte))
- return;
+ return -1;
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3423,6 +3380,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
*/
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
+
+ return 0;
}
#endif
@@ -3518,15 +3477,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
unsigned int irq_want;
struct intel_iommu *iommu = NULL;
int index = 0;
+ int node;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
+ node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
return -1;
irq_want = irq + 1;
@@ -3576,7 +3537,7 @@ void arch_teardown_msi_irq(unsigned int irq)
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3585,7 +3546,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3597,6 +3558,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -3630,7 +3593,7 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3639,7 +3602,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3651,6 +3614,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -3670,12 +3635,14 @@ int arch_setup_hpet_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
+ struct irq_desc *desc = irq_to_desc(irq);
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
+ desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
"edge");
@@ -3705,7 +3672,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3713,11 +3680,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
target_ht_irq(irq, dest, cfg->vector);
+
+ return 0;
}
#endif
@@ -3792,6 +3761,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long flags;
int err;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3805,15 +3776,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3831,10 +3800,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
entry->mask = 1;
mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3898,6 +3867,71 @@ int __init arch_probe_nr_irqs(void)
}
#endif
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int node;
+ int ioapic, pin;
+ int trigger, polarity;
+
+ ioapic = irq_attr->ioapic;
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ if (dev)
+ node = dev_to_node(dev);
+ else
+ node = cpu_to_node(boot_cpu_id);
+
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
+ pin = irq_attr->ioapic_pin;
+ trigger = irq_attr->trigger;
+ polarity = irq_attr->polarity;
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, ioapic, pin);
+ }
+
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+ return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int ioapic, pin;
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ ioapic = irq_attr->ioapic;
+ pin = irq_attr->ioapic_pin;
+ if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[ioapic].apicid, pin);
+ return 0;
+ }
+ set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+ return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
@@ -3978,6 +4012,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
return apic_id;
}
+#endif
int __init io_apic_get_version(int ioapic)
{
@@ -3990,39 +4025,6 @@ int __init io_apic_get_version(int ioapic)
return reg_01.bits.version;
}
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
- struct irq_desc *desc;
- struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc %d\n", irq);
- return 0;
- }
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= NR_IRQS_LEGACY) {
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
- }
-
- setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
- return 0;
-}
-
int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
{
@@ -4053,51 +4055,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
- int pin, ioapic, irq, irq_entry;
+ int pin, ioapic = 0, irq, irq_entry;
struct irq_desc *desc;
- struct irq_cfg *cfg;
const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
-
- /* setup_IO_APIC_irqs could fail to get vector for some device
- * when you have too many devices, because at that time only boot
- * cpu is online.
- */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- if (!cfg->vector) {
- setup_IO_APIC_irq(ioapic, pin, irq, desc,
- irq_trigger(irq_entry),
- irq_polarity(irq_entry));
- continue;
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ ioapic = 0;
+ }
+#endif
- }
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
- /*
- * Honour affinities which have been set in early boot
- */
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
- else
- mask = apic->target_cpus();
+ desc = irq_to_desc(irq);
- if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq_desc(desc, mask);
- else
- set_ioapic_affinity_irq_desc(desc, mask);
- }
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = apic->target_cpus();
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
+ else
+ set_ioapic_affinity_irq_desc(desc, mask);
}
+
}
#endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index d6bd62407152..ce4fbfa315a1 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -138,7 +138,7 @@ int __init check_nmi_watchdog(void)
if (!prev_nmi_count)
goto error;
- alloc_cpumask_var(&backtrace_mask, GFP_KERNEL);
+ alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
@@ -414,7 +414,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
touched = 1;
}
- if (cpumask_test_cpu(cpu, backtrace_mask)) {
+ /* We can be called before check_nmi_watchdog, hence NULL check. */
+ if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652bb0e5..bc3e880f9b82 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
void __init default_setup_apic_routing(void)
{
#ifdef CONFIG_X86_X2APIC
- if (x2apic && (apic != &apic_x2apic_phys &&
+ if (x2apic_mode && (apic != &apic_x2apic_phys &&
#ifdef CONFIG_X86_UV
apic != &apic_x2apic_uv_x &&
#endif
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9cfe1f415d81..344eee4ac0a4 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){
rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
}
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT 4
-#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
static const struct cpumask *summit_target_cpus(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1248318436e8..780a733a5e7a 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -19,6 +19,7 @@
#include <linux/timer.h>
#include <linux/cpu.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -34,6 +35,17 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
+static int early_get_nodeid(void)
+{
+ union uvh_node_id_u node_id;
+ unsigned long *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
+ node_id.v = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return node_id.s.node_id;
+}
+
static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
@@ -42,6 +54,8 @@ static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
+ __get_cpu_var(x2apic_extra_bits) =
+ early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1);
uv_system_type = UV_NON_UNIQUE_APIC;
return 1;
}
@@ -91,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
#ifdef CONFIG_SMP
unsigned long val;
@@ -549,7 +563,8 @@ void __init uv_system_init(void)
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
int max_pnode = 0;
- unsigned long mmr_base, present;
+ unsigned long mmr_base, present, paddr;
+ unsigned short pnode_mask;
map_low_mmrs();
@@ -568,15 +583,18 @@ void __init uv_system_init(void)
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_blade_info);
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_node_to_blade);
memset(uv_node_to_blade, 255, bytes);
bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!uv_cpu_to_blade);
memset(uv_cpu_to_blade, 255, bytes);
blade = 0;
@@ -592,6 +610,7 @@ void __init uv_system_init(void)
}
}
+ pnode_mask = (1 << n_val) - 1;
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
@@ -615,7 +634,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -631,6 +650,17 @@ void __init uv_system_init(void)
lcpu, blade);
}
+ /* Add blade/pnode info for nodes without cpus */
+ for_each_online_node(nid) {
+ if (uv_node_to_blade[nid] >= 0)
+ continue;
+ paddr = node_start_pfn(nid) << PAGE_SHIFT;
+ paddr = uv_soc_phys_ram_to_gpa(paddr);
+ pnode = (paddr >> m_val) & pnode_mask;
+ blade = boot_pnode_to_blade(pnode);
+ uv_node_to_blade[nid] = blade;
+ }
+
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_config_high(max_pnode);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f63882728d91..63a88e1f987d 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -182,7 +182,8 @@ void uv_bios_init(void)
memcpy(&uv_systab, tab, sizeof(struct uv_systab));
iounmap(tab);
- printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+ printk(KERN_INFO "EFI UV System Table Revision %d\n",
+ uv_systab.revision);
}
#else /* !CONFIG_EFI */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459daa64..728b3750a3e8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -272,7 +272,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
int cpu = smp_processor_id();
int node;
- unsigned apicid = hard_smp_processor_id();
+ unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
node = c->phys_proc_id;
if (apicid_to_node[apicid] != NUMA_NO_NODE)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c4f667896c28..b0517aa2bd3b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+static int __init x86_xsave_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+ return 1;
+}
+__setup("noxsave", x86_xsave_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -292,7 +299,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
void load_percpu_segment(int cpu)
{
@@ -761,6 +769,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_identify)
this_cpu->c_identify(c);
+ /* Clear/Set all flags overriden by options, after probe */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
@@ -806,6 +820,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#endif
init_hypervisor(c);
+
+ /*
+ * Clear/Set all flags overriden by options, need do it
+ * before following smp all cpus cap AND.
+ */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -818,10 +842,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
- /* Clear all flags overriden by options */
- for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
@@ -1203,6 +1223,8 @@ void __cpuinit cpu_init(void)
load_TR_desc();
load_LDT(&init_mm.context);
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab96c6a..2fc4f6bb9ca5 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -588,8 +588,20 @@ static void print_apic(void *arg)
seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
-#endif /* CONFIG_X86_LOCAL_APIC */
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ unsigned int i, v, maxeilvt;
+
+ v = apic_read(APIC_EFEAT);
+ maxeilvt = (v >> 16) & 0xff;
+ seq_printf(seq, " EFEAT\t\t: %08x\n", v);
+ seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
+ for (i = 0; i < maxeilvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
+ }
+ }
+#endif /* CONFIG_X86_LOCAL_APIC */
seq_printf(seq, "\n MSR\t:\n");
}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c839875478..f138c6c389b9 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -220,11 +220,14 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver"
+ tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
select CPU_FREQ_TABLE
- depends on X86_32
+ depends on X86_32 && EXPERIMENTAL
help
- This adds the CPUFreq driver for VIA C7 processors.
+ This adds the CPUFreq driver for VIA C7 processors. However, this driver
+ does not have any safeguards to prevent operating the CPU out of spec
+ and is thus considered dangerous. Please use the regular ACPI cpufreq
+ driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
If in doubt, say N.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ecdb682ab516..ae9b503220ca 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -65,14 +65,18 @@ enum {
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
struct cpufreq_frequency_table *freq_table;
- unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
- u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+struct acpi_msr_data {
+ u64 saved_aperf, saved_mperf;
+};
+
+static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
+
DEFINE_TRACE(power_mark);
/* acpi_perf_data is a pointer to percpu data. */
@@ -86,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid)
{
struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
- if (cpu->x86_vendor != X86_VENDOR_INTEL ||
- !cpu_has(cpu, X86_FEATURE_EST))
- return 0;
-
- return 1;
+ return cpu_has(cpu, X86_FEATURE_EST);
}
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
@@ -287,11 +287,11 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
return 0;
cur.aperf.whole = readin.aperf.whole -
- per_cpu(drv_data, cpu)->saved_aperf;
+ per_cpu(msr_data, cpu).saved_aperf;
cur.mperf.whole = readin.mperf.whole -
- per_cpu(drv_data, cpu)->saved_mperf;
- per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
- per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+ per_cpu(msr_data, cpu).saved_mperf;
+ per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
+ per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
#ifdef __i386__
/*
@@ -335,7 +335,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
#endif
- retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
+ retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
return retval;
}
@@ -546,7 +546,7 @@ static int __init acpi_cpufreq_early_init(void)
return -ENOMEM;
}
for_each_possible_cpu(i) {
- if (!alloc_cpumask_var_node(
+ if (!zalloc_cpumask_var_node(
&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
GFP_KERNEL, cpu_to_node(i))) {
@@ -688,16 +688,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
policy->cpuinfo.transition_latency > 20 * 1000) {
- static int print_once;
policy->cpuinfo.transition_latency = 20 * 1000;
- if (!print_once) {
- print_once = 1;
- printk(KERN_INFO "Capping off P-state tranision latency"
- " at 20 uS\n");
- }
+ printk_once(KERN_INFO
+ "P-state transition latency capped at 20 uS\n");
}
- data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
for (i = 0; i < perf->state_count; i++) {
if (i > 0 && perf->states[i].core_frequency >=
@@ -716,6 +711,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (result)
goto err_freqfree;
+ if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
+ printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
/* Current speed is unknown and not detectable by IO port */
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 6ac55bd341ae..869615193720 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
case 0x0E: /* Core */
case 0x0F: /* Core Duo */
case 0x16: /* Celeron Core */
+ case 0x1C: /* Atom */
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
case 0x0D: /* Pentium M (Dothan) */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 3c28ccd49742..d47c775eb0ab 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -168,10 +168,12 @@ static int check_powernow(void)
return 1;
}
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
static void invalidate_entry(unsigned int entry)
{
powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
}
+#endif
static int get_ranges(unsigned char *pst)
{
@@ -320,7 +322,7 @@ static int powernow_acpi_init(void)
goto err0;
}
- if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+ if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
GFP_KERNEL)) {
retval = -ENOMEM;
goto err05;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 4709ead2db52..cf52215d9eb1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data)
data->batps);
}
+static u32 freq_from_fid_did(u32 fid, u32 did)
+{
+ u32 mhz = 0;
+
+ if (boot_cpu_data.x86 == 0x10)
+ mhz = (100 * (fid + 0x10)) >> did;
+ else if (boot_cpu_data.x86 == 0x11)
+ mhz = (100 * (fid + 8)) >> did;
+ else
+ BUG();
+
+ return mhz * 1000;
+}
+
static int fill_powernow_table(struct powernow_k8_data *data,
struct pst_s *pst, u8 maxvid)
{
@@ -821,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val = -ENODEV;
- acpi_integer space_id;
+ acpi_integer control, status;
if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
@@ -834,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
goto err_out;
}
- space_id = data->acpi_data.control_register.space_id;
- if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ control = data->acpi_data.control_register.space_id;
+ status = data->acpi_data.status_register.space_id;
+
+ if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- space_id);
+ control, status);
goto err_out;
}
@@ -872,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
- if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+ if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
printk(KERN_ERR PFX
"unable to alloc powernow_k8_data cpumask\n");
ret_val = -ENOMEM;
@@ -923,8 +938,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
powernow_table[i].index = index;
- powernow_table[i].frequency =
- data->acpi_data.states[i].core_frequency * 1000;
+ /* Frequency may be rounded for these */
+ if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) {
+ powernow_table[i].frequency =
+ freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
+ } else
+ powernow_table[i].frequency =
+ data->acpi_data.states[i].core_frequency * 1000;
}
return 0;
}
@@ -1215,13 +1235,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
return cpufreq_frequency_table_verify(pol, data->powernow_table);
}
+static const char ACPI_PSS_BIOS_BUG_MSG[] =
+ KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
+ KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
+
/* per CPU init entry point to the driver */
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
cpumask_t oldmask;
int rc;
- static int print_once;
if (!cpu_online(pol->cpu))
return -ENODEV;
@@ -1244,19 +1267,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
* an UP version, and is deprecated by AMD.
*/
if (num_online_cpus() != 1) {
- /*
- * Replace this one with print_once as soon as such a
- * thing gets introduced
- */
- if (!print_once) {
- WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
- "does not provide ACPI _PSS objects "
- "in a way that Linux understands. "
- "Please report this to the Linux ACPI"
- " maintainers and complain to your "
- "BIOS vendor.\n");
- print_once++;
- }
+ printk_once(ACPI_PSS_BIOS_BUG_MSG);
goto err_out;
}
if (pol->cpu != 0) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index c9f1fdc02830..55c831ed71ce 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy,
if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
return -ENOMEM;
- if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
+ if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
free_cpumask_var(saved_mask);
return -ENOMEM;
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7437fa133c02..daed39ba2614 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
}
#endif
-static void __cpuinit srat_detect_node(void)
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
unsigned node;
int cpu = smp_processor_id();
- int apicid = hard_smp_processor_id();
+ int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
@@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
/* Work around errata */
- srat_detect_node();
+ srat_detect_node(c);
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 863f89568b1a..09dd1d414fc3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -239,9 +239,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
-
- mce_log(&m);
- add_taint(TAINT_MACHINE_CHECK);
+ if (!(flags & MCP_DONTLOG)) {
+ mce_log(&m);
+ add_taint(TAINT_MACHINE_CHECK);
+ }
/*
* Clear state for this bank.
@@ -452,13 +453,14 @@ void mce_log_therm_throt_event(__u64 status)
*/
static int check_interval = 5 * 60; /* 5 minutes */
-static int next_interval; /* in jiffies */
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
static void mcheck_timer(unsigned long);
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
+ int *n;
WARN_ON(smp_processor_id() != data);
@@ -470,14 +472,14 @@ static void mcheck_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
+ n = &__get_cpu_var(next_interval);
if (mce_notify_user()) {
- next_interval = max(next_interval/2, HZ/100);
+ *n = max(*n/2, HZ/100);
} else {
- next_interval = min(next_interval * 2,
- (int)round_jiffies_relative(check_interval*HZ));
+ *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
}
- t->expires = jiffies + next_interval;
+ t->expires = jiffies + *n;
add_timer(t);
}
@@ -584,7 +586,7 @@ static void mce_init(void *dummy)
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC, &all_banks);
+ machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -632,14 +634,13 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
+ int *n = &__get_cpu_var(next_interval);
- /* data race harmless because everyone sets to the same value */
- if (!next_interval)
- next_interval = check_interval * HZ;
- if (!next_interval)
+ *n = check_interval * HZ;
+ if (!*n)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + *n);
add_timer(t);
}
@@ -907,7 +908,6 @@ static void mce_cpu_restart(void *data)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
- next_interval = check_interval * HZ;
on_each_cpu(mce_cpu_restart, NULL, 1);
}
@@ -1110,7 +1110,8 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies +
+ __get_cpu_var(next_interval));
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
@@ -1162,7 +1163,7 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
- alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+ zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
err = mce_init_banks();
if (err)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index d6b72df89d69..cef3ee30744b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -151,10 +151,11 @@ static void print_update(char *type, int *hdr, int num)
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+ unsigned long flags;
int hdr = 0;
int i;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
@@ -184,7 +185,7 @@ static void cmci_discover(int banks, int boot)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
@@ -211,13 +212,14 @@ void cmci_recheck(void)
*/
void cmci_clear(void)
{
+ unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
@@ -227,7 +229,7 @@ void cmci_clear(void)
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0b776c09aff3..d21d4fb161f7 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -275,7 +275,11 @@ static void __init print_mtrr_state(void)
}
printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
- high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+ if (size_or_mask & 0xffffffffUL)
+ high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
+ else
+ high_width = ffs(size_or_mask>>32) + 32 - 1;
+ high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index f93047fed791..d5e30397246b 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
- cpumask_weight(cpu_sibling_mask(cpu)));
+ cpumask_weight(cpu_core_mask(cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ef2c3563357d..006281302925 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1074,12 +1074,13 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
u64 addr;
u64 start;
- start = startt;
- while (size < sizet && (start + 1))
+ for (start = startt; ; start += size) {
start = find_e820_area_size(start, &size, align);
-
- if (size < sizet)
- return 0;
+ if (!(start + 1))
+ return 0;
+ if (size >= sizet)
+ break;
+ }
#ifdef CONFIG_X86_32
if (start >= MAXMEM)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a331ec38af9e..38946c6e8433 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1410,7 +1410,10 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+ RESTORE_ALL 8
+ jmp irq_return
paranoid_restore:
+ TRACE_IRQS_IRETQ 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 18dfa30795c9..b79c5533c421 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -442,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 4b)
- : [old] "=r" (old), [faulted] "=r" (faulted)
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
: "memory"
);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 648b3a2a3a44..81408b93f887 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -236,6 +236,10 @@ static void hpet_stop_counter(void)
unsigned long cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
+}
+
+static void hpet_reset_counter(void)
+{
hpet_writel(0, HPET_COUNTER);
hpet_writel(0, HPET_COUNTER + 4);
}
@@ -250,6 +254,7 @@ static void hpet_start_counter(void)
static void hpet_restart_counter(void)
{
hpet_stop_counter();
+ hpet_reset_counter();
hpet_start_counter();
}
@@ -309,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg;
+ unsigned long cfg, cmp, now;
uint64_t delta;
switch (mode) {
@@ -317,12 +322,23 @@ static void hpet_set_mode(enum clock_event_mode mode,
hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
+ now = hpet_readl(HPET_COUNTER);
+ cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cmp, HPET_Tn_CMP(timer));
+ udelay(1);
+ /*
+ * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
+ * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
+ * bit is automatically cleared after the first write.
+ * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
+ * Publication # 24674)
+ */
hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
@@ -722,7 +738,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
/*
* Clock source related code
*/
-static cycle_t read_hpet(void)
+static cycle_t read_hpet(struct clocksource *cs)
{
return (cycle_t)hpet_readl(HPET_COUNTER);
}
@@ -756,7 +772,7 @@ static int hpet_clocksource_register(void)
hpet_restart_counter();
/* Verify whether hpet counter works */
- t1 = read_hpet();
+ t1 = hpet_readl(HPET_COUNTER);
rdtscll(start);
/*
@@ -770,7 +786,7 @@ static int hpet_clocksource_register(void)
rdtscll(now);
} while ((now - start) < 200000UL);
- if (t1 == read_hpet()) {
+ if (t1 == hpet_readl(HPET_COUNTER)) {
printk(KERN_WARNING
"HPET counter not counting. HPET disabled\n");
return -ENODEV;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 3475440baa54..c2e0bb0890d4 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -129,7 +129,7 @@ void __init setup_pit_timer(void)
* to just read by itself. So use jiffies to emulate a free
* running counter:
*/
-static cycle_t pit_read(void)
+static cycle_t pit_read(struct clocksource *cs)
{
static int old_count;
static u32 old_jifs;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010d74c8..c1739ac29708 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -24,9 +24,9 @@ void (*generic_interrupt_extension)(void) = NULL;
*/
void ack_bad_irq(unsigned int irq)
{
- printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+ if (printk_ratelimit())
+ pr_err("unexpected IRQ trap at vector %02x\n", irq);
-#ifdef CONFIG_X86_LOCAL_APIC
/*
* Currently unexpected vectors happen only on SMP and APIC.
* We _must_ ack these because every local APIC has only N
@@ -36,9 +36,7 @@ void ack_bad_irq(unsigned int irq)
* completely.
* But only ack when the APIC is enabled -AK
*/
- if (cpu_has_apic)
- ack_APIC_irq();
-#endif
+ ack_APIC_irq();
}
#define irq_stats(x) (&per_cpu(irq_stat, x))
@@ -178,7 +176,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_thermal_count;
# ifdef CONFIG_X86_64
sum += irq_stats(cpu)->irq_threshold_count;
-#endif
+# endif
#endif
return sum;
}
@@ -213,14 +211,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
irq = __get_cpu_var(vector_irq)[vector];
if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
- if (!disable_apic)
- ack_APIC_irq();
-#endif
+ ack_APIC_irq();
if (printk_ratelimit())
- printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
- __func__, smp_processor_id(), vector, irq);
+ pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+ __func__, smp_processor_id(), vector, irq);
}
irq_exit();
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c
index 368b0a8836f9..2e08b10ad51a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit.c
@@ -1,20 +1,25 @@
+#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/ioport.h>
#include <linux/interrupt.h>
+#include <linux/timex.h>
#include <linux/slab.h>
#include <linux/random.h>
+#include <linux/kprobes.h>
#include <linux/init.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
#include <linux/bitops.h>
+#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/timer.h>
+#include <asm/hw_irq.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/apic.h>
@@ -22,7 +27,23 @@
#include <asm/i8259.h>
#include <asm/traps.h>
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+#ifdef CONFIG_X86_32
/*
* Note that on a 486, we don't want to do a SIGFPE on an irq13
* as the irq is unreliable, and exception 16 works correctly
@@ -52,30 +73,7 @@ static struct irqaction fpu_irq = {
.handler = math_error_irq,
.name = "fpu",
};
-
-void __init init_ISA_irqs(void)
-{
- int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- init_bsp_APIC();
#endif
- init_8259A(0);
-
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- for (i = 0; i < NR_IRQS_LEGACY; i++) {
- struct irq_desc *desc = irq_to_desc(i);
-
- desc->status = IRQ_DISABLED;
- desc->action = NULL;
- desc->depth = 1;
-
- set_irq_chip_and_handler_name(i, &i8259A_chip,
- handle_level_irq, "XT");
- }
-}
/*
* IRQ2 is cascade interrupt to second interrupt controller
@@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector)
return 0;
}
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-void __init native_init_IRQ(void)
+static void __init init_ISA_irqs(void)
{
int i;
- /* Execute any quirks before the call gates are initialised: */
- x86_quirk_pre_intr_init();
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+ init_bsp_APIC();
+#endif
+ init_8259A(0);
/*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
+ * 16 old-style INTA-cycle interrupts:
*/
- for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
- /* SYSCALL_VECTOR was reserved in trap_init. */
- if (i != SYSCALL_VECTOR)
- set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
+ struct irq_desc *desc = irq_to_desc(i);
+
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
+ set_irq_chip_and_handler_name(i, &i8259A_chip,
+ handle_level_irq, "XT");
}
+}
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+static void __init smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
* IPI, driven by wakeup.
@@ -160,16 +166,27 @@ void __init native_init_IRQ(void)
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
- /* IPI for single call function */
+ /* IPI for generic single function call */
alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
- call_function_single_interrupt);
+ call_function_single_interrupt);
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
#endif
+#endif /* CONFIG_SMP */
+}
+
+static void __init apic_intr_init(void)
+{
+ smp_intr_init();
+
+#ifdef CONFIG_X86_64
+ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+ alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
@@ -179,16 +196,67 @@ void __init native_init_IRQ(void)
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+ /* Performance monitoring interrupts: */
+# ifdef CONFIG_PERF_COUNTERS
+ alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+ alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+# endif
+
#endif
+#ifdef CONFIG_X86_32
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
/* thermal monitor LVT interrupt */
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
+#endif
+}
+
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ * Perform any necessary interrupt initialisation prior to setting up
+ * the "ordinary" interrupt call gates. For legacy reasons, the ISA
+ * interrupts should be initialised here if the machine emulates a PC
+ * in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+#ifdef CONFIG_X86_32
+ if (x86_quirks->arch_pre_intr_init) {
+ if (x86_quirks->arch_pre_intr_init())
+ return;
+ }
+#endif
+ init_ISA_irqs();
+}
+
+void __init native_init_IRQ(void)
+{
+ int i;
+
+ /* Execute any quirks before the call gates are initialised: */
+ x86_quirk_pre_intr_init();
+
+ apic_intr_init();
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+ /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
+ if (!test_bit(i, used_vectors))
+ set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+ }
if (!acpi_ioapic)
setup_irq(2, &irq2);
+#ifdef CONFIG_X86_32
/*
* Call quirks after call gates are initialised (usually add in
* the architecture specific gates):
@@ -203,4 +271,5 @@ void __init native_init_IRQ(void)
setup_irq(FPU_IRQ, &fpu_irq);
irq_ctx_init(smp_processor_id());
+#endif
}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
deleted file mode 100644
index 8cd10537fd46..000000000000
--- a/arch/x86/kernel/irqinit_64.c
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = {
- .handler = no_action,
- .name = "cascade",
-};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
- [0 ... IRQ0_VECTOR - 1] = -1,
- [IRQ0_VECTOR] = 0,
- [IRQ1_VECTOR] = 1,
- [IRQ2_VECTOR] = 2,
- [IRQ3_VECTOR] = 3,
- [IRQ4_VECTOR] = 4,
- [IRQ5_VECTOR] = 5,
- [IRQ6_VECTOR] = 6,
- [IRQ7_VECTOR] = 7,
- [IRQ8_VECTOR] = 8,
- [IRQ9_VECTOR] = 9,
- [IRQ10_VECTOR] = 10,
- [IRQ11_VECTOR] = 11,
- [IRQ12_VECTOR] = 12,
- [IRQ13_VECTOR] = 13,
- [IRQ14_VECTOR] = 14,
- [IRQ15_VECTOR] = 15,
- [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-int vector_used_by_percpu_irq(unsigned int vector)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (per_cpu(vector_irq, cpu)[vector] != -1)
- return 1;
- }
-
- return 0;
-}
-
-static void __init init_ISA_irqs(void)
-{
- int i;
-
- init_bsp_APIC();
- init_8259A(0);
-
- for (i = 0; i < NR_IRQS_LEGACY; i++) {
- struct irq_desc *desc = irq_to_desc(i);
-
- desc->status = IRQ_DISABLED;
- desc->action = NULL;
- desc->depth = 1;
-
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- set_irq_chip_and_handler_name(i, &i8259A_chip,
- handle_level_irq, "XT");
- }
-}
-
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPIs for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
- /* IPI for generic function call */
- alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* IPI for generic single function call */
- alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
- call_function_single_interrupt);
-
- /* Low priority IPI to cleanup after moving an irq */
- set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
- set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
-}
-
-static void __init apic_intr_init(void)
-{
- smp_intr_init();
-
- alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
- alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-
- /* self generated IPI for local APIC timer */
- alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* generic IPI for platform specific use */
- alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-}
-
-void __init native_init_IRQ(void)
-{
- int i;
-
- init_ISA_irqs();
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != IA32_SYSCALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
- }
-
- apic_intr_init();
-
- if (!acpi_ioapic)
- setup_irq(2, &irq2);
-}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index eedfaebe1063..b1f4dffb919e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -88,6 +88,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
+ gdb_regs[GDB_SP] = (int)&regs->sp;
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -100,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
-#endif
gdb_regs[GDB_SP] = regs->sp;
+#endif
}
/**
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 137f2e8132df..223af43f1526 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void)
return ret;
}
+static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
+{
+ return kvm_clock_read();
+}
+
/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
@@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void)
static struct clocksource kvm_clock = {
.name = "kvm-clock",
- .read = kvm_clock_read,
+ .read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.mult = 1 << KVM_SCALE,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index e7368c1da01d..c1c429d00130 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -194,7 +194,7 @@ void machine_kexec(struct kimage *image)
unsigned int preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -253,7 +253,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 89cea4d44679..84c3bf209e98 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -274,7 +274,7 @@ void machine_kexec(struct kimage *image)
int save_ftrace_enabled;
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -333,7 +333,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 2e0eb4140951..98c470c069d1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -380,8 +380,6 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
return err;
err = microcode_init_cpu(cpu);
- if (err)
- sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return err;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70fd7e414c15..651c93b28862 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -17,6 +17,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/smp.h>
+#include <linux/pci.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
@@ -870,24 +871,17 @@ static
inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
- int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
{
- if (!mpc_new_phys) {
- pr_info("No spare slots, try to append...take your risk, "
- "new mpc_length %x\n", count);
- } else {
- if (count <= mpc_new_length)
- pr_info("No spare slots, try to append..., "
- "new mpc_length %x\n", count);
- else {
- pr_err("mpc_new_length %lx is too small\n",
- mpc_new_length);
- return -1;
- }
+ int ret = 0;
+
+ if (!mpc_new_phys || count <= mpc_new_length) {
+ WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+ return -1;
}
- return 0;
+ return ret;
}
static int __init replace_intsrc_all(struct mpc_table *mpc,
@@ -946,7 +940,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
} else {
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
count += sizeof(struct mpc_intsrc);
- if (!check_slot(mpc_new_phys, mpc_new_length, count))
+ if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
goto out;
assign_to_mpc_intsrc(&mp_irqs[i], m);
mpc->length = count;
@@ -963,11 +957,14 @@ out:
return 0;
}
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
static int __init update_mptable_setup(char *str)
{
enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+ pci_routeirq = 1;
+#endif
return 0;
}
early_param("update_mptable", update_mptable_setup);
@@ -980,6 +977,9 @@ static int __initdata alloc_mptable;
static int __init parse_alloc_mptable_opt(char *p)
{
enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+ pci_routeirq = 1;
+#endif
alloc_mptable = 1;
if (!p)
return 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f4464880..9faf43bea336 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -134,7 +134,9 @@ static void *get_call_destination(u8 type)
.pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
+#endif
};
return *((void **)&tmpl + type);
}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 34f12e9996ed..221a3853e268 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -50,7 +50,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-struct dma_map_ops swiotlb_dma_ops = {
+static struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = x86_swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e95022e4f5d5..7563b31b4f03 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -261,8 +261,6 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
{
if (hpet_force_user)
old_ich_force_enable_hpet(dev);
- else
- hpet_print_force_info();
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1340dad417f4..667188e0b5a0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -232,6 +232,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
},
},
+ { /* Handle problems with rebooting on Sony VGN-Z540N */
+ .callback = set_bios_reboot,
+ .ident = "Sony VGN-Z540N",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2b093451aec9..7791eef95b91 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1003,24 +1003,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- * Perform any necessary interrupt initialisation prior to setting up
- * the "ordinary" interrupt call gates. For legacy reasons, the ISA
- * interrupts should be initialised here if the machine emulates a PC
- * in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
- if (x86_quirks->arch_pre_intr_init) {
- if (x86_quirks->arch_pre_intr_init())
- return;
- }
- init_ISA_irqs();
-}
-
-/**
* x86_quirk_intr_init - post gate setup interrupt initialisation
*
* Description:
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 3a97a4cf1872..8f0e13be36b3 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
/*
* If large page isn't supported, there's no benefit in doing
* this. Also, on non-NUMA, embedding is better.
+ *
+ * NOTE: disabled for now.
*/
- if (!cpu_has_pse || !pcpu_need_numa())
+ if (true || !cpu_has_pse || !pcpu_need_numa())
return -EINVAL;
/*
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea8ccaa..f6db48c405b8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
}
struct smp_ops smp_ops = {
- .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
- .smp_prepare_cpus = native_smp_prepare_cpus,
- .smp_cpus_done = native_smp_cpus_done,
+ .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = native_smp_prepare_cpus,
+ .smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
- .smp_send_reschedule = native_smp_send_reschedule,
+ .smp_send_stop = native_smp_send_stop,
+ .smp_send_reschedule = native_smp_send_reschedule,
- .cpu_up = native_cpu_up,
- .cpu_die = native_cpu_die,
- .cpu_disable = native_cpu_disable,
- .play_dead = native_play_dead,
+ .cpu_up = native_cpu_up,
+ .cpu_die = native_cpu_die,
+ .cpu_disable = native_cpu_disable,
+ .play_dead = native_play_dead,
- .send_call_func_ipi = native_send_call_func_ipi,
+ .send_call_func_ipi = native_send_call_func_ipi,
.send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58d24ef917d8..7c80007ea5f7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid)
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-int __devinit
+int __cpuinit
wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-int __devinit
+static int __cpuinit
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
@@ -822,10 +822,12 @@ do_rest:
/* mark "stuck" area as not stuck */
*((volatile unsigned long *)trampoline_base) = 0;
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
+ if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+ /*
+ * Cleanup possible dangling ends...
+ */
+ smpboot_restore_warm_reset_vector();
+ }
return boot_error;
}
@@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
*/
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
!cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk(KERN_ERR "... forcing use of dummy APIC emulation."
+ if (!disable_apic) {
+ pr_err("BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ pr_err("... forcing use of dummy APIC emulation."
"(tell your hw vendor)\n");
+ }
smpboot_clear_io_apic();
arch_disable_smp_support();
return -1;
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index deb5ebb32c3b..8c7b03b0cfcb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -25,6 +25,8 @@ static int uv_bau_retry_limit __read_mostly;
/* position of pnode (which is nasid>>1): */
static int uv_nshift __read_mostly;
+/* base pnode in this partition */
+static int uv_partition_base_pnode __read_mostly;
static unsigned long uv_mmask __read_mostly;
@@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
/*
+ * Determine the first node on a blade.
+ */
+static int __init blade_to_first_node(int blade)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (blade == b)
+ return node;
+ }
+ return -1; /* shouldn't happen */
+}
+
+/*
+ * Determine the apicid of the first cpu on a blade.
+ */
+static int __init blade_to_first_apicid(int blade)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (blade == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
+
+/*
* Free a software acknowledge hardware resource by clearing its Pending
* bit. This will return a reply to the sender.
* If the message has timed out, a reply has already been sent by the
@@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
cpu = uv_blade_processor_id();
msg->number_of_cpus =
- uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+ uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
this_cpu_mask = 1UL << cpu;
if (msp->seen_by.bits & this_cpu_mask)
return;
@@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
-const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
int tries = 0;
- int blade;
+ int pnode;
int bit;
unsigned long mmr_offset;
unsigned long index;
@@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
* use the IPI method of shootdown on them.
*/
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade == this_blade)
+ pnode = uv_cpu_to_pnode(bit);
+ if (pnode == this_pnode)
continue;
cpumask_clear_cpu(bit, flush_mask);
}
@@ -309,16 +339,16 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
int i;
int bit;
- int blade;
+ int pnode;
int uv_cpu;
- int this_blade;
+ int this_pnode;
int locals = 0;
struct bau_desc *bau_desc;
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
- this_blade = uv_numa_blade_id();
+ this_pnode = uv_hub_info->pnode;
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
@@ -326,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
i = 0;
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
- if (blade == this_blade) {
+ pnode = uv_cpu_to_pnode(bit);
+ BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+ if (pnode == this_pnode) {
locals++;
continue;
}
- bau_node_set(blade, &bau_desc->distribution);
+ bau_node_set(pnode - uv_partition_base_pnode,
+ &bau_desc->distribution);
i++;
}
if (i == 0) {
@@ -350,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = cpu;
- return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+ return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
}
/*
@@ -418,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+/*
+ * uv_enable_timeouts
+ *
+ * Each target blade (i.e. blades that have cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
static void uv_enable_timeouts(void)
{
- int i;
int blade;
- int last_blade;
+ int nblades;
int pnode;
- int cur_cpu = 0;
- unsigned long apicid;
+ unsigned long mmr_image;
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ nblades = uv_num_possible_blades();
+
+ for (blade = 0; blade < nblades; blade++) {
+ if (!uv_blade_nr_possible_cpus(blade))
continue;
- last_blade = blade;
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+
pnode = uv_blade_to_pnode(blade);
- cur_cpu += uv_blade_nr_possible_cpus(i);
+ mmr_image =
+ uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= ((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
}
@@ -482,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
stat->requestee, stat->onetlb, stat->alltlb,
stat->s_retry, stat->d_retry, stat->ptc_i);
seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
- uv_read_global_mmr64(uv_blade_to_pnode
- (uv_cpu_to_blade_id(cpu)),
+ uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
stat->sflush, stat->dflush,
stat->retriesok, stat->nomsg,
@@ -617,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
* finish the initialization of the per-blade control structures
*/
static void __init
-uv_table_bases_finish(int blade, int node, int cur_cpu,
+uv_table_bases_finish(int blade,
struct bau_control *bau_tablesp,
struct bau_desc *adp)
{
struct bau_control *bcp;
- int i;
+ int cpu;
- for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
- bcp = (struct bau_control *)&per_cpu(bau_control, i);
+ for_each_present_cpu(cpu) {
+ if (blade != uv_cpu_to_blade_id(cpu))
+ continue;
+ bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
bcp->bau_msg_head = bau_tablesp->va_queue_first;
bcp->va_queue_first = bau_tablesp->va_queue_first;
bcp->va_queue_last = bau_tablesp->va_queue_last;
@@ -649,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode)
struct bau_desc *adp;
struct bau_desc *ad2;
- adp = (struct bau_desc *)
- kmalloc_node(16384, GFP_KERNEL, node);
+ adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
BUG_ON(!adp);
- pa = __pa((unsigned long)adp);
+ pa = uv_gpa(adp); /* need the real nasid*/
n = pa >> uv_nshift;
m = pa & uv_mmask;
@@ -667,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode)
for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
memset(ad2, 0, sizeof(struct bau_desc));
ad2->header.sw_ack_flag = 1;
- ad2->header.base_dest_nodeid =
- uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+ /*
+ * base_dest_nodeid is the first node in the partition, so
+ * the bit map will indicate partition-relative node numbers.
+ * note that base_dest_nodeid is actually a nasid.
+ */
+ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
ad2->header.command = UV_NET_ENDPOINT_INTD;
ad2->header.int_both = 1;
/*
@@ -686,6 +755,8 @@ static struct bau_payload_queue_entry * __init
uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
{
struct bau_payload_queue_entry *pqp;
+ unsigned long pa;
+ int pn;
char *cp;
pqp = (struct bau_payload_queue_entry *) kmalloc_node(
@@ -696,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
cp = (char *)pqp + 31;
pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
bau_tablesp->va_queue_first = pqp;
+ /*
+ * need the pnode of where the memory was really allocated
+ */
+ pa = uv_gpa(pqp);
+ pn = pa >> uv_nshift;
uv_write_global_mmr64(pnode,
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
- ((unsigned long)pnode <<
- UV_PAYLOADQ_PNODE_SHIFT) |
+ ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
uv_physnodeaddr(pqp));
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
uv_physnodeaddr(pqp));
@@ -715,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
/*
* Initialization of each UV blade's structures
*/
-static int __init uv_init_blade(int blade, int node, int cur_cpu)
+static int __init uv_init_blade(int blade)
{
+ int node;
int pnode;
unsigned long pa;
unsigned long apicid;
@@ -724,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
struct bau_payload_queue_entry *pqp;
struct bau_control *bau_tablesp;
+ node = blade_to_first_node(blade);
bau_tablesp = uv_table_bases_init(blade, node);
pnode = uv_blade_to_pnode(blade);
adp = uv_activation_descriptor_init(node, pnode);
pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
- uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+ uv_table_bases_finish(blade, bau_tablesp, adp);
/*
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
*/
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+ apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
if ((pa & 0xff) != UV_BAU_MESSAGE) {
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -748,44 +825,34 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
static int __init uv_bau_init(void)
{
int blade;
- int node;
int nblades;
- int last_blade;
int cur_cpu;
if (!is_uv_system())
return 0;
for_each_possible_cpu(cur_cpu)
- alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+ zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
GFP_KERNEL, cpu_to_node(cur_cpu));
uv_bau_retry_limit = 1;
uv_nshift = uv_hub_info->n_val;
uv_mmask = (1UL << uv_hub_info->n_val) - 1;
- nblades = 0;
- last_blade = -1;
- cur_cpu = 0;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- nblades++;
- }
+ nblades = uv_num_possible_blades();
+
uv_bau_table_bases = (struct bau_control **)
kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
BUG_ON(!uv_bau_table_bases);
- last_blade = -1;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- uv_init_blade(blade, node, cur_cpu);
- cur_cpu += uv_blade_nr_possible_cpus(blade);
- }
+ uv_partition_base_pnode = 0x7fffffff;
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade) &&
+ (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+ uv_partition_base_pnode = uv_blade_to_pnode(blade);
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade))
+ uv_init_blade(blade);
+
alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d288327ff0..2310700faca5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -969,11 +969,8 @@ void __init trap_init(void)
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);
-#ifdef CONFIG_X86_64
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
- set_bit(SYSCALL_VECTOR, used_vectors);
-#endif
+
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7a567ebe6361..d57de05dc430 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc;
* code, which is necessary to support wrapping clocksources like pm
* timer.
*/
-static cycle_t read_tsc(void)
+static cycle_t read_tsc(struct clocksource *cs)
{
cycle_t ret = (cycle_t)get_cycles();
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 67f9b9dbf800..36afb98675a4 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -21,6 +21,7 @@
#include <linux/sysdev.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
struct kobject *sgi_uv_kobj;
@@ -47,6 +48,9 @@ static int __init sgi_uv_sysfs_init(void)
{
unsigned long ret;
+ if (!is_uv_system())
+ return -ENODEV;
+
if (!sgi_uv_kobj)
sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
if (!sgi_uv_kobj) {
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 2ffb6c53326e..583f11d5c480 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -29,7 +29,7 @@
#define RTC_NAME "sgi_rtc"
-static cycle_t uv_read_rtc(void);
+static cycle_t uv_read_rtc(struct clocksource *cs);
static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
static void uv_rtc_timer_setup(enum clock_event_mode,
struct clock_event_device *);
@@ -123,7 +123,7 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
+ return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode));
}
/*
@@ -256,7 +256,7 @@ static int uv_rtc_unset_timer(int cpu)
spin_lock_irqsave(&head->lock, flags);
- if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
+ if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t)
rc = 1;
*t = ULLONG_MAX;
@@ -278,7 +278,7 @@ static int uv_rtc_unset_timer(int cpu)
/*
* Read the RTC.
*/
-static cycle_t uv_read_rtc(void)
+static cycle_t uv_read_rtc(struct clocksource *cs)
{
return (cycle_t)uv_read_local_mmr(UVH_RTC);
}
@@ -291,7 +291,7 @@ static int uv_rtc_next_event(unsigned long delta,
{
int ced_cpu = cpumask_first(ced->cpumask);
- return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
}
/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index d303369a7bad..2b3eb82efeeb 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void)
/** vmi clocksource */
static struct clocksource clocksource_vmi;
-static cycle_t read_real_cycles(void)
+static cycle_t read_real_cycles(struct clocksource *cs)
{
cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
return max(ret, clocksource_vmi.cycle_last);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 0a5b04aa98f1..c5ee17e8c6d9 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -89,7 +89,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_USEDFPU) {
/*
* Start with clearing the user buffer. This will present a
@@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf)
return -1;
}
+ clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a36f7f7c4c7..32cf11e5728a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1248,7 +1248,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
- sp->global = role.cr4_pge;
+ sp->global = 0;
hlist_add_head(&sp->hash_link, bucket);
if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
@@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->tlb_flush(vcpu);
- set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+ kvm_set_cr3(vcpu, vcpu->arch.cr3);
return 1;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1821c2078199..1f8510c51d6e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -411,7 +411,6 @@ static __init int svm_hardware_setup(void)
iopm_va = page_address(iopm_pages);
memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
- clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
if (boot_cpu_has(X86_FEATURE_NX))
@@ -796,6 +795,11 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+ /* AMD's VMCB does not have an explicit unusable field, so emulate it
+ * for cross vendor migration purposes by "not present"
+ */
+ var->unusable = !var->present || (var->type == 0);
+
switch (seg) {
case VCPU_SREG_CS:
/*
@@ -827,8 +831,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->type |= 0x1;
break;
}
-
- var->unusable = !var->present;
}
static int svm_get_cpl(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ca100a9ecac..3944e917e794 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
+ unsigned long old_cr4 = vcpu->arch.cr4;
+ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+
if (cr4 & CR4_RESERVED_BITS) {
printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
kvm_inject_gp(vcpu, 0);
@@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
kvm_inject_gp(vcpu, 0);
return;
}
- } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
+ } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
+ && ((cr4 ^ old_cr4) & pdptr_bits)
&& !load_pdptrs(vcpu, vcpu->arch.cr3)) {
printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
kvm_inject_gp(vcpu, 0);
@@ -1121,9 +1125,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int is_efer_nx(void)
{
- u64 efer;
+ unsigned long long efer = 0;
- rdmsrl(MSR_EFER, efer);
+ rdmsrl_safe(MSR_EFER, &efer);
return efer & EFER_NX;
}
@@ -1259,7 +1263,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
bit(X86_FEATURE_SYSCALL) |
- (bit(X86_FEATURE_NX) && is_efer_nx()) |
+ (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
#ifdef CONFIG_X86_64
bit(X86_FEATURE_LM) |
#endif
@@ -2775,6 +2779,9 @@ out:
void kvm_arch_exit(void)
{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
}
@@ -4159,6 +4166,11 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.time_page) {
+ kvm_release_page_dirty(vcpu->arch.time_page);
+ vcpu->arch.time_page = NULL;
+ }
+
kvm_x86_ops->vcpu_free(vcpu);
}
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 27f0c9ed7f60..94e0e54056a9 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1 +1,2 @@
obj-y := i386_head.o boot.o
+CFLAGS_boot.o := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e94a11e42f98..ef4205c1a7a5 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -67,6 +67,7 @@
#include <asm/mce.h>
#include <asm/io.h>
#include <asm/i387.h>
+#include <asm/stackprotector.h>
#include <asm/reboot.h> /* for struct machine_ops */
/*G:010 Welcome to the Guest!
@@ -273,15 +274,15 @@ static void lguest_load_idt(const struct desc_ptr *desc)
* controls the entire thing and the Guest asks it to make changes using the
* LOAD_GDT hypercall.
*
- * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
- * hypercall and use that repeatedly to load a new IDT. I don't think it
- * really matters, but wouldn't it be nice if they were the same? Wouldn't
- * it be even better if you were the one to send the patch to fix it?
+ * This is the exactly like the IDT code.
*/
static void lguest_load_gdt(const struct desc_ptr *desc)
{
- BUG_ON((desc->size + 1) / 8 != GDT_ENTRIES);
- kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES);
+ unsigned int i;
+ struct desc_struct *gdt = (void *)desc->address;
+
+ for (i = 0; i < (desc->size+1)/8; i++)
+ kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
}
/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
@@ -291,7 +292,9 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
const void *desc, int type)
{
native_write_gdt_entry(dt, entrynum, desc, type);
- kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
+ /* Tell Host about this new entry. */
+ kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum,
+ dt[entrynum].a, dt[entrynum].b);
}
/* OK, I lied. There are three "thread local storage" GDT entries which change
@@ -634,7 +637,7 @@ static void __init lguest_init_IRQ(void)
void lguest_setup_irq(unsigned int irq)
{
- irq_to_desc_alloc_cpu(irq, 0);
+ irq_to_desc_alloc_node(irq, 0);
set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
}
@@ -661,7 +664,7 @@ static unsigned long lguest_tsc_khz(void)
/* If we can't use the TSC, the kernel falls back to our lower-priority
* "lguest_clock", where we read the time value given to us by the Host. */
-static cycle_t lguest_clock_read(void)
+static cycle_t lguest_clock_read(struct clocksource *cs)
{
unsigned long sec, nsec;
@@ -1086,13 +1089,21 @@ __init void lguest_init(void)
* lguest_init() where the rest of the fairly chaotic boot setup
* occurs. */
+ /* The stack protector is a weird thing where gcc places a canary
+ * value on the stack and then checks it on return. This file is
+ * compiled with -fno-stack-protector it, so we got this far without
+ * problems. The value of the canary is kept at offset 20 from the
+ * %gs register, so we need to set that up before calling C functions
+ * in other files. */
+ setup_stack_canary_segment(0);
+ /* We could just call load_stack_canary_segment(), but we might as
+ * call switch_to_new_gdt() which loads the whole table and sets up
+ * the per-cpu segment descriptor register %fs as well. */
+ switch_to_new_gdt(0);
+
/* As described in head_32.S, we map the first 128M of memory. */
max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
- /* Load the %fs segment register (the per-cpu segment register) with
- * the normal data segment to get through booting. */
- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
-
/* The Host<->Guest Switcher lives at the top of our address space, and
* the Host told us how big it is when we made LGUEST_INIT hypercall:
* it put the answer in lguest_data.reserve_mem */
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8f307d914c2e..f46c340727b8 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
unsigned long sbase = saddr & PUD_MASK;
unsigned long s_end = sbase + PUD_SIZE;
+ /* Allow segments to share if only one is marked locked */
+ unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+ unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+
/*
* match the virtual addresses, permission and the alignment of the
* page table page.
*/
if (pmd_index(addr) != pmd_index(saddr) ||
- vma->vm_flags != svma->vm_flags ||
+ vm_flags != svm_flags ||
sbase < svma->vm_start || svma->vm_end < s_end)
return 0;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd3da1dda1c9..ae4f7b5d7104 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -7,6 +7,7 @@
#include <asm/page.h>
#include <asm/page_types.h>
#include <asm/sections.h>
+#include <asm/setup.h>
#include <asm/system.h>
#include <asm/tlbflush.h>
@@ -304,8 +305,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
#endif
#ifdef CONFIG_X86_64
- if (!after_bootmem)
+ if (!after_bootmem && !start) {
+ pud_t *pud;
+ pmd_t *pmd;
+
mmu_cr4_features = read_cr4();
+
+ /*
+ * _brk_end cannot change anymore, but it and _end may be
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+ pmd_clear(pmd);
+ }
#endif
__flush_tlb_all();
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 09daebfdb11c..8a450930834f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -280,15 +280,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return NULL;
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+
+ if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
free_memtype(phys_addr, phys_addr + size);
free_vm_area(area);
return NULL;
}
- if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
free_memtype(phys_addr, phys_addr + size);
- vunmap(area->addr);
+ free_vm_area(area);
return NULL;
}
@@ -374,7 +375,8 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
* - UC_MINUS for non-WB-able memory with no other conflicting mappings
* - Inherit from confliting mappings otherwise
*/
- err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
+ err = reserve_memtype(phys_addr, phys_addr + size,
+ _PAGE_CACHE_WB, &flags);
if (err < 0)
return NULL;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 4f115e00486b..50dc802a1c46 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -87,7 +87,7 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
{
struct kmmio_probe *p;
list_for_each_entry_rcu(p, &kmmio_probes, list) {
- if (addr >= p->addr && addr <= (p->addr + p->len))
+ if (addr >= p->addr && addr < (p->addr + p->len))
return p;
}
return NULL;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3daefa04ace5..d2530062fe00 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -257,7 +257,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
}
#endif
-static unsigned long calculate_numa_remap_pages(void)
+static __init unsigned long calculate_numa_remap_pages(void)
{
int nid;
unsigned long size, reserve_pages = 0;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index d73aaa892371..2d05a12029dc 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -188,6 +188,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
int nid;
+ if (!end)
+ return;
+
start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d71e1b636ce6..e17efed088c5 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg)
*/
__flush_tlb_all();
- if (cache && boot_cpu_data.x86_model >= 4)
+ if (cache && boot_cpu_data.x86 >= 4)
wbinvd();
}
@@ -208,20 +208,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
int in_flags, struct page **pages)
{
unsigned int i, level;
+ unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
BUG_ON(irqs_disabled());
- on_each_cpu(__cpa_flush_range, NULL, 1);
+ on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
- if (!cache)
+ if (!cache || do_wbinvd)
return;
- /* 4M threshold */
- if (numpages >= 1024) {
- if (boot_cpu_data.x86_model >= 4)
- wbinvd();
- return;
- }
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
@@ -945,71 +940,94 @@ int _set_memory_uc(unsigned long addr, int numpages)
int set_memory_uc(unsigned long addr, int numpages)
{
+ int ret;
+
/*
* for now UC MINUS. see comments in ioremap_nocache()
*/
- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
- _PAGE_CACHE_UC_MINUS, NULL))
- return -EINVAL;
+ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ _PAGE_CACHE_UC_MINUS, NULL);
+ if (ret)
+ goto out_err;
+
+ ret = _set_memory_uc(addr, numpages);
+ if (ret)
+ goto out_free;
- return _set_memory_uc(addr, numpages);
+ return 0;
+
+out_free:
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+out_err:
+ return ret;
}
EXPORT_SYMBOL(set_memory_uc);
int set_memory_array_uc(unsigned long *addr, int addrinarray)
{
- unsigned long start;
- unsigned long end;
- int i;
+ int i, j;
+ int ret;
+
/*
* for now UC MINUS. see comments in ioremap_nocache()
*/
for (i = 0; i < addrinarray; i++) {
- start = __pa(addr[i]);
- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
- if (end != __pa(addr[i + 1]))
- break;
- i++;
- }
- if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
- goto out;
+ ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
+ _PAGE_CACHE_UC_MINUS, NULL);
+ if (ret)
+ goto out_free;
}
- return change_page_attr_set(addr, addrinarray,
+ ret = change_page_attr_set(addr, addrinarray,
__pgprot(_PAGE_CACHE_UC_MINUS), 1);
-out:
- for (i = 0; i < addrinarray; i++) {
- unsigned long tmp = __pa(addr[i]);
-
- if (tmp == start)
- break;
- for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
- if (end != __pa(addr[i + 1]))
- break;
- i++;
- }
- free_memtype(tmp, end);
- }
- return -EINVAL;
+ if (ret)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ for (j = 0; j < i; j++)
+ free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
+
+ return ret;
}
EXPORT_SYMBOL(set_memory_array_uc);
int _set_memory_wc(unsigned long addr, int numpages)
{
- return change_page_attr_set(&addr, numpages,
+ int ret;
+ ret = change_page_attr_set(&addr, numpages,
+ __pgprot(_PAGE_CACHE_UC_MINUS), 0);
+
+ if (!ret) {
+ ret = change_page_attr_set(&addr, numpages,
__pgprot(_PAGE_CACHE_WC), 0);
+ }
+ return ret;
}
int set_memory_wc(unsigned long addr, int numpages)
{
+ int ret;
+
if (!pat_enabled)
return set_memory_uc(addr, numpages);
- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
- _PAGE_CACHE_WC, NULL))
- return -EINVAL;
+ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+ _PAGE_CACHE_WC, NULL);
+ if (ret)
+ goto out_err;
- return _set_memory_wc(addr, numpages);
+ ret = _set_memory_wc(addr, numpages);
+ if (ret)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+out_err:
+ return ret;
}
EXPORT_SYMBOL(set_memory_wc);
@@ -1021,29 +1039,31 @@ int _set_memory_wb(unsigned long addr, int numpages)
int set_memory_wb(unsigned long addr, int numpages)
{
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ int ret;
+
+ ret = _set_memory_wb(addr, numpages);
+ if (ret)
+ return ret;
- return _set_memory_wb(addr, numpages);
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+ return 0;
}
EXPORT_SYMBOL(set_memory_wb);
int set_memory_array_wb(unsigned long *addr, int addrinarray)
{
int i;
+ int ret;
- for (i = 0; i < addrinarray; i++) {
- unsigned long start = __pa(addr[i]);
- unsigned long end;
-
- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
- if (end != __pa(addr[i + 1]))
- break;
- i++;
- }
- free_memtype(start, end);
- }
- return change_page_attr_clear(addr, addrinarray,
+ ret = change_page_attr_clear(addr, addrinarray,
__pgprot(_PAGE_CACHE_MASK), 1);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < addrinarray; i++)
+ free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
+
+ return 0;
}
EXPORT_SYMBOL(set_memory_array_wb);
@@ -1136,6 +1156,8 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
retval = cpa_clear_pages_array(pages, addrinarray,
__pgprot(_PAGE_CACHE_MASK));
+ if (retval)
+ return retval;
for (i = 0; i < addrinarray; i++) {
start = (unsigned long)page_address(pages[i]);
@@ -1143,7 +1165,7 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
free_memtype(start, end);
}
- return retval;
+ return 0;
}
EXPORT_SYMBOL(set_pages_array_wb);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index c009a241d562..e6718bb28065 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -182,10 +182,10 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
u8 mtrr_type;
mtrr_type = mtrr_type_lookup(start, end);
- if (mtrr_type == MTRR_TYPE_UNCACHABLE)
- return _PAGE_CACHE_UC;
- if (mtrr_type == MTRR_TYPE_WRCOMB)
- return _PAGE_CACHE_WC;
+ if (mtrr_type != MTRR_TYPE_WRBACK)
+ return _PAGE_CACHE_UC_MINUS;
+
+ return _PAGE_CACHE_WB;
}
return req_type;
@@ -352,23 +352,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
return 0;
}
- if (req_type == -1) {
- /*
- * Call mtrr_lookup to get the type hint. This is an
- * optimization for /dev/mem mmap'ers into WB memory (BIOS
- * tools and ACPI tools). Use WB request for WB memory and use
- * UC_MINUS otherwise.
- */
- u8 mtrr_type = mtrr_type_lookup(start, end);
-
- if (mtrr_type == MTRR_TYPE_WRBACK)
- actual_type = _PAGE_CACHE_WB;
- else
- actual_type = _PAGE_CACHE_UC_MINUS;
- } else {
- actual_type = pat_x_mtrr_type(start, end,
- req_type & _PAGE_CACHE_MASK);
- }
+ /*
+ * Call mtrr_lookup to get the type hint. This is an
+ * optimization for /dev/mem mmap'ers into WB memory (BIOS
+ * tools and ACPI tools). Use WB request for WB memory and use
+ * UC_MINUS otherwise.
+ */
+ actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK);
if (new_type)
*new_type = actual_type;
@@ -546,9 +536,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
- u64 offset = ((u64) pfn) << PAGE_SHIFT;
- unsigned long flags = -1;
- int retval;
+ unsigned long flags = _PAGE_CACHE_WB;
if (!range_is_allowed(pfn, size))
return 0;
@@ -576,64 +564,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
}
#endif
- /*
- * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
- *
- * Without O_SYNC, we want to get
- * - WB for WB-able memory and no other conflicting mappings
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
- * - Inherit from confliting mappings otherwise
- */
- if (flags != -1) {
- retval = reserve_memtype(offset, offset + size, flags, NULL);
- } else {
- retval = reserve_memtype(offset, offset + size, -1, &flags);
- }
-
- if (retval < 0)
- return 0;
-
- if (((pfn < max_low_pfn_mapped) ||
- (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
- ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
- free_memtype(offset, offset + size);
- printk(KERN_INFO
- "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
- current->comm, current->pid,
- cattr_name(flags),
- offset, (unsigned long long)(offset + size));
- return 0;
- }
-
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
flags);
return 1;
}
-void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
-{
- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
- u64 addr = (u64)pfn << PAGE_SHIFT;
- unsigned long flags;
-
- reserve_memtype(addr, addr + size, want_flags, &flags);
- if (flags != want_flags) {
- printk(KERN_INFO
- "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
- current->comm, current->pid,
- cattr_name(want_flags),
- addr, (unsigned long long)(addr + size),
- cattr_name(flags));
- }
-}
-
-void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
-{
- u64 addr = (u64)pfn << PAGE_SHIFT;
-
- free_memtype(addr, addr + size);
-}
-
/*
* Change the memory type for the physial address range in kernel identity
* mapping space if that range is a part of identity map.
@@ -671,8 +606,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
{
int is_ram = 0;
int ret;
- unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
+ unsigned long flags = want_flags;
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
@@ -734,29 +669,28 @@ static void free_pfn_range(u64 paddr, unsigned long size)
*
* If the vma has a linear pfn mapping for the entire range, we get the prot
* from pte and reserve the entire vma range with single reserve_pfn_range call.
- * Otherwise, we reserve the entire vma range, my ging through the PTEs page
- * by page to get physical address and protection.
*/
int track_pfn_vma_copy(struct vm_area_struct *vma)
{
- int retval = 0;
- unsigned long i, j;
resource_size_t paddr;
unsigned long prot;
- unsigned long vma_start = vma->vm_start;
- unsigned long vma_end = vma->vm_end;
- unsigned long vma_size = vma_end - vma_start;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (!pat_enabled)
return 0;
+ /*
+ * For now, only handle remap_pfn_range() vmas where
+ * is_linear_pfn_mapping() == TRUE. Handling of
+ * vm_insert_pfn() is TBD.
+ */
if (is_linear_pfn_mapping(vma)) {
/*
* reserve the whole chunk covered by vma. We need the
* starting address and protection from pte.
*/
- if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
+ if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
WARN_ON_ONCE(1);
return -EINVAL;
}
@@ -764,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
- /* reserve entire vma page by page, using pfn and prot from pte */
- for (i = 0; i < vma_size; i += PAGE_SIZE) {
- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
- continue;
-
- pgprot = __pgprot(prot);
- retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
- if (retval)
- goto cleanup_ret;
- }
return 0;
-
-cleanup_ret:
- /* Reserve error: Cleanup partial reservation and return error */
- for (j = 0; j < i; j += PAGE_SIZE) {
- if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
- continue;
-
- free_pfn_range(paddr, PAGE_SIZE);
- }
-
- return retval;
}
/*
@@ -795,50 +708,28 @@ cleanup_ret:
* prot is passed in as a parameter for the new mapping. If the vma has a
* linear pfn mapping for the entire range reserve the entire vma range with
* single reserve_pfn_range call.
- * Otherwise, we look t the pfn and size and reserve only the specified range
- * page by page.
- *
- * Note that this function can be called with caller trying to map only a
- * subrange/page inside the vma.
*/
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
{
- int retval = 0;
- unsigned long i, j;
- resource_size_t base_paddr;
resource_size_t paddr;
- unsigned long vma_start = vma->vm_start;
- unsigned long vma_end = vma->vm_end;
- unsigned long vma_size = vma_end - vma_start;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
if (!pat_enabled)
return 0;
+ /*
+ * For now, only handle remap_pfn_range() vmas where
+ * is_linear_pfn_mapping() == TRUE. Handling of
+ * vm_insert_pfn() is TBD.
+ */
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
return reserve_pfn_range(paddr, vma_size, prot, 0);
}
- /* reserve page by page using pfn and size */
- base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
- for (i = 0; i < size; i += PAGE_SIZE) {
- paddr = base_paddr + i;
- retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
- if (retval)
- goto cleanup_ret;
- }
return 0;
-
-cleanup_ret:
- /* Reserve error: Cleanup partial reservation and return error */
- for (j = 0; j < i; j += PAGE_SIZE) {
- paddr = base_paddr + j;
- free_pfn_range(paddr, PAGE_SIZE);
- }
-
- return retval;
}
/*
@@ -849,39 +740,23 @@ cleanup_ret:
void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size)
{
- unsigned long i;
resource_size_t paddr;
- unsigned long prot;
- unsigned long vma_start = vma->vm_start;
- unsigned long vma_end = vma->vm_end;
- unsigned long vma_size = vma_end - vma_start;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
if (!pat_enabled)
return;
+ /*
+ * For now, only handle remap_pfn_range() vmas where
+ * is_linear_pfn_mapping() == TRUE. Handling of
+ * vm_insert_pfn() is TBD.
+ */
if (is_linear_pfn_mapping(vma)) {
/* free the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
free_pfn_range(paddr, vma_size);
return;
}
-
- if (size != 0 && size != vma_size) {
- /* free page by page, using pfn and size */
- paddr = (resource_size_t)pfn << PAGE_SHIFT;
- for (i = 0; i < size; i += PAGE_SIZE) {
- paddr = paddr + i;
- free_pfn_range(paddr, PAGE_SIZE);
- }
- } else {
- /* free entire vma, page by page, using the pfn from pte */
- for (i = 0; i < vma_size; i += PAGE_SIZE) {
- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
- continue;
-
- free_pfn_range(paddr, PAGE_SIZE);
- }
- }
}
pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 16ae70fc57e7..29a0e37114f8 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -216,7 +216,7 @@ int __init get_memcfg_from_srat(void)
if (num_memory_chunks == 0) {
printk(KERN_WARNING
- "could not finy any ACPI SRAT memory areas.\n");
+ "could not find any ACPI SRAT memory areas.\n");
goto out_fail;
}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index c7d272b8574c..01765955baaf 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -28,6 +28,7 @@ int acpi_numa __initdata;
static struct acpi_table_slit *acpi_slit;
static nodemask_t nodes_parsed __initdata;
+static nodemask_t cpu_nodes_parsed __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
@@ -141,6 +142,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
apic_id = pa->apic_id;
apicid_to_node[apic_id] = node;
+ node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
pxm, apic_id, node);
@@ -174,6 +176,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
else
apic_id = pa->apic_id;
apicid_to_node[apic_id] = node;
+ node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
pxm, apic_id, node);
@@ -358,6 +361,7 @@ static void __init unparse_node(int node)
{
int i;
node_clear(node, nodes_parsed);
+ node_clear(node, cpu_nodes_parsed);
for (i = 0; i < MAX_LOCAL_APIC; i++) {
if (apicid_to_node[i] == node)
apicid_to_node[i] = NUMA_NO_NODE;
@@ -402,7 +406,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
- node_possible_map = nodes_parsed;
+ /* Account for nodes with cpus and no memory */
+ nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
/* Finally register nodes */
for_each_node_mask(i, node_possible_map)
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 04df67f8a7ba..044897be021f 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -76,9 +76,9 @@ void
x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct frame_head *head = (struct frame_head *)frame_pointer(regs);
- unsigned long stack = kernel_trap_sp(regs);
if (!user_mode_vm(regs)) {
+ unsigned long stack = kernel_stack_pointer(regs);
if (depth)
dump_trace(NULL, regs, (unsigned long *)stack, 0,
&backtrace_ops, &depth);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 9bb09823b362..f893d6a6e803 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -94,12 +94,16 @@ struct pci_root_info {
static int pci_root_num;
static struct pci_root_info pci_root_info[PCI_ROOT_NR];
-void set_pci_bus_resources_arch_default(struct pci_bus *b)
+void x86_pci_root_bus_res_quirks(struct pci_bus *b)
{
int i;
int j;
struct pci_root_info *info;
+ /* don't go for it if _CRS is used */
+ if (pci_probe & PCI_USE__CRS)
+ return;
+
/* if only one root bus, don't need to anything */
if (pci_root_num < 2)
return;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 8c362b96b644..2202b6257b82 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -147,10 +147,13 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
* are examined.
*/
-void __devinit pcibios_fixup_bus(struct pci_bus *b)
+void __devinit pcibios_fixup_bus(struct pci_bus *b)
{
struct pci_dev *dev;
+ /* root bus? */
+ if (!b->parent)
+ x86_pci_root_bus_res_quirks(b);
pci_read_bridge_bases(b);
list_for_each_entry(dev, &b->devices, bus_list)
pcibios_fixup_device_resources(dev);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index f1817f71e009..a85bef20a3b9 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -238,6 +238,10 @@ void __init pcibios_resource_survey(void)
*/
fs_initcall(pcibios_assign_resources);
+void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b)
+{
+}
+
/*
* If we set up a device for bus mastering, we need to check the latency
* timer as certain crappy BIOSes forget to set it properly.
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fecbce6e7d7c..0696d506c4ad 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
return 0;
}
+ if (io_apic_assign_pci_irqs)
+ return 0;
+
/* Find IRQ routing entry */
if (!pirq_table)
@@ -1039,56 +1042,15 @@ static void __init pcibios_fixup_irqs(void)
pirq_penalty[dev->irq]++;
}
+ if (io_apic_assign_pci_irqs)
+ return;
+
dev = NULL;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (!pin)
continue;
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Recalculate IRQ numbers if we use the I/O APIC.
- */
- if (io_apic_assign_pci_irqs) {
- int irq;
-
- /*
- * interrupt pins are numbered starting from 1
- */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
- PCI_SLOT(dev->devfn), pin - 1);
- /*
- * Busses behind bridges are typically not listed in the
- * MP-table. In this case we have to look up the IRQ
- * based on the parent bus, parent slot, and pin number.
- * The SMP code detects such bridged busses itself so we
- * should get into this branch reliably.
- */
- if (irq < 0 && dev->bus->parent) {
- /* go back to the bridge */
- struct pci_dev *bridge = dev->bus->self;
- int bus;
-
- pin = pci_swizzle_interrupt_pin(dev, pin);
- bus = bridge->bus->number;
- irq = IO_APIC_get_PCI_irq_vector(bus,
- PCI_SLOT(bridge->devfn), pin - 1);
- if (irq >= 0)
- dev_warn(&dev->dev,
- "using bridge %s INT %c to "
- "get IRQ %d\n",
- pci_name(bridge),
- 'A' + pin - 1, irq);
- }
- if (irq >= 0) {
- dev_info(&dev->dev,
- "PCI->APIC IRQ transform: INT %c "
- "-> IRQ %d\n",
- 'A' + pin - 1, irq);
- dev->irq = irq;
- }
- }
-#endif
/*
* Still no IRQ? Try to lookup one...
*/
@@ -1183,6 +1145,19 @@ int __init pcibios_irq_init(void)
pcibios_enable_irq = pirq_enable_irq;
pcibios_fixup_irqs();
+
+ if (io_apic_assign_pci_irqs && pci_routeirq) {
+ struct pci_dev *dev = NULL;
+ /*
+ * PCI IRQ routing is set up by pci_enable_device(), but we
+ * also do it here in case there are still broken drivers that
+ * don't use pci_enable_device().
+ */
+ printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+ for_each_pci_dev(dev)
+ pirq_enable_irq(dev);
+ }
+
return 0;
}
@@ -1213,16 +1188,23 @@ void pcibios_penalize_isa_irq(int irq, int active)
static int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin;
- struct pci_dev *temp_dev;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ if (pin && !pcibios_lookup_irq(dev, 1)) {
char *msg = "";
+ if (!io_apic_assign_pci_irqs && dev->irq)
+ return 0;
+
if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+ struct pci_dev *temp_dev;
int irq;
+ struct io_apic_irq_attr irq_attr;
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ pin - 1, &irq_attr);
/*
* Busses behind bridges are typically not listed in the MP-table.
* In this case we have to look up the IRQ based on the parent bus,
@@ -1235,7 +1217,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
pin = pci_swizzle_interrupt_pin(dev, pin);
irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin - 1);
+ PCI_SLOT(bridge->devfn),
+ pin - 1, &irq_attr);
if (irq >= 0)
dev_warn(&dev->dev, "using bridge %s "
"INT %c to get IRQ %d\n",
@@ -1245,12 +1228,15 @@ static int pirq_enable_irq(struct pci_dev *dev)
}
dev = temp_dev;
if (irq >= 0) {
+ io_apic_set_pci_routing(&dev->dev, irq,
+ &irq_attr);
+ dev->irq = irq;
dev_info(&dev->dev, "PCI->APIC IRQ transform: "
"INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
- dev->irq = irq;
return 0;
} else
msg = "; probably buggy MP table";
+#endif
} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
msg = "";
else
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 905bb526b133..8766b0e216c5 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -418,7 +418,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
struct resource mcfg_res;
mcfg_res.start = start;
- mcfg_res.end = end;
+ mcfg_res.end = end - 1;
mcfg_res.flags = 0;
acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
@@ -439,7 +439,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,
u64 old_size = size;
int valid = 0;
- while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
+ while (!is_reserved(addr, addr + size, E820_RESERVED)) {
size >>= 1;
if (size < (16UL<<20))
break;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index d9d35824c56f..6a40b78b46aa 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -104,11 +104,13 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
- BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
- offsetof(struct timespec, tv_nsec) ||
- sizeof(*tv) != sizeof(struct timespec));
- do_realtime((struct timespec *)tv);
- tv->tv_usec /= 1000;
+ if (likely(tv != NULL)) {
+ BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
+ offsetof(struct timespec, tv_nsec) ||
+ sizeof(*tv) != sizeof(struct timespec));
+ do_realtime((struct timespec *)tv);
+ tv->tv_usec /= 1000;
+ }
if (unlikely(tz != NULL)) {
/* Avoid memcpy. Some old compilers fail to inline it */
tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3b767d03fd6a..172438f86a02 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -9,5 +9,6 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o
-obj-$(CONFIG_SMP) += smp.o spinlock.o
-obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 9842b1212407..fba55b1a4021 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/bug.h>
+#include <linux/module.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -1794,6 +1795,11 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+ reserve_early(__pa(xen_start_info->pt_base),
+ __pa(xen_start_info->pt_base +
+ xen_start_info->nr_pt_frames * PAGE_SIZE),
+ "XEN PAGETABLES");
+
return swapper_pg_dir;
}
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 14f240623497..0a5aa44299a5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -213,6 +213,11 @@ cycle_t xen_clocksource_read(void)
return ret;
}
+static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
+{
+ return xen_clocksource_read();
+}
+
static void xen_read_wallclock(struct timespec *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
@@ -241,7 +246,7 @@ int xen_set_wallclock(unsigned long now)
static struct clocksource xen_clocksource __read_mostly = {
.name = "xen",
.rating = 400,
- .read = xen_clocksource_read,
+ .read = xen_clocksource_get_cycles,
.mask = ~0,
.mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
.shift = XEN_SHIFT,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 20139464943c..ca6596b05d53 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -62,15 +62,26 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
-void __init xen_init_spinlocks(void);
-__cpuinit void xen_init_lock_cpu(int cpu);
-void xen_uninit_lock_cpu(int cpu);
-
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
#endif
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init xen_init_spinlocks(void);
+__cpuinit void xen_init_lock_cpu(int cpu);
+void xen_uninit_lock_cpu(int cpu);
+#else
+static inline void xen_init_spinlocks(void)
+{
+}
+static inline void xen_init_lock_cpu(int cpu)
+{
+}
+static inline void xen_uninit_lock_cpu(int cpu)
+{
+}
+#endif
/* Declare an asm function, along with symbols needed to make it
inlineable */