diff options
Diffstat (limited to 'arch/x86')
37 files changed, 1854 insertions, 178 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a12dbb2b93f3..f70e3e3a9fa7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -537,9 +537,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. -config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU) - # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -550,6 +547,8 @@ config SWIOTLB access 32-bits of memory can be used on systems with more than 3 GB of memory. If unsure, say Y. +config IOMMU_HELPER + def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) config NR_CPUS int "Maximum number of CPUs (2-255)" @@ -1505,6 +1504,10 @@ config PCI_GODIRECT config PCI_GOANY bool "Any" +config PCI_GOOLPC + bool "OLPC" + depends on OLPC + endchoice config PCI_BIOS @@ -1514,12 +1517,17 @@ config PCI_BIOS # x86-64 doesn't support PCI BIOS access from long mode so always go direct. config PCI_DIRECT def_bool y - depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS) config PCI_MMCONFIG def_bool y depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) +config PCI_OLPC + bool + depends on PCI && PCI_GOOLPC + default y + config PCI_DOMAINS def_bool y depends on PCI @@ -1639,6 +1647,13 @@ config GEODE_MFGPT_TIMER MFGPTs have a better resolution and max interval than the generic PIT, and are suitable for use as high-res timers. +config OLPC + bool "One Laptop Per Child support" + default n + help + Add support for detecting the unique features of the OLPC + XO hardware. + endif # X86_32 config K8_NB diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index d84a48ece785..03399d64013b 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -126,17 +126,25 @@ void query_edd(void) { char eddarg[8]; int do_mbr = 1; +#ifdef CONFIG_EDD_OFF + int do_edd = 0; +#else int do_edd = 1; +#endif int be_quiet; int devno; struct edd_info ei, *edp; u32 *mbrptr; if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { - if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) + if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) { + do_edd = 1; do_mbr = 0; + } else if (!strcmp(eddarg, "off")) do_edd = 0; + else if (!strcmp(eddarg, "on")) + do_edd = 1; } be_quiet = cmdline_find_option_bool("quiet"); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index bbed3a26ce55..cb3856a18c85 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -128,7 +128,7 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fa19c3819540..30d54ed27e55 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -91,6 +91,8 @@ endif obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o +obj-$(CONFIG_OLPC) += olpc.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) @@ -101,4 +103,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o + + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 977ed5cdeaa3..c49ebcc6c41e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -771,6 +771,32 @@ static void __init acpi_register_lapic_address(unsigned long address) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } +static int __init early_acpi_parse_madt_lapic_addr_ovr(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + acpi_register_lapic_address(acpi_lapic_addr); + + return count; +} + static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -901,6 +927,33 @@ static inline int acpi_parse_madt_ioapic_entries(void) } #endif /* !CONFIG_X86_IO_APIC */ +static void __init early_acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = early_acpi_parse_madt_lapic_addr_ovr(); + if (!error) { + acpi_lapic = 1; + smp_found_config = 1; + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif +} + static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -1233,6 +1286,23 @@ int __init acpi_boot_table_init(void) return 0; } +int __init early_acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + early_acpi_process_madt(); + + return 0; +} + int __init acpi_boot_init(void) { /* diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 670c3c311289..92588083950f 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -9,6 +9,7 @@ #include <linux/signal.h> #include <linux/personality.h> #include <linux/suspend.h> +#include <linux/kbuild.h> #include <asm/ucontext.h> #include "sigframe.h" #include <asm/pgtable.h> @@ -23,14 +24,6 @@ #include <linux/lguest.h> #include "../../../drivers/lguest/lg.h" -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - /* workaround for a warning with -Wmissing-prototypes */ void foo(void); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 494e1e096ee6..f126c05d6170 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/suspend.h> +#include <linux/kbuild.h> #include <asm/pda.h> #include <asm/processor.h> #include <asm/segment.h> @@ -17,14 +18,6 @@ #include <asm/ia32.h> #include <asm/bootparam.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)) - #define __NO_STUBS 1 #undef __SYSCALL #undef _ASM_X86_64_UNISTD_H_ diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8db8f73503b3..b0c8208df9fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -601,6 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = perf->shared_cpu_map; } + policy->related_cpus = perf->shared_cpu_map; #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 353efe4f5017..5d241ce94a44 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -90,7 +90,7 @@ u8 mtrr_type_lookup(u64 start, u64 end) * Look of multiple ranges matching this address and pick type * as per MTRR precedence */ - if (!mtrr_state.enabled & 2) { + if (!(mtrr_state.enabled & 2)) { return mtrr_state.def_type; } diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 1960f1985e5e..84c480bb3715 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -424,7 +424,7 @@ static int __init mtrr_if_init(void) return -ENODEV; proc_root_mtrr = - proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); if (proc_root_mtrr) proc_root_mtrr->owner = THIS_MODULE; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 645ee5e32a27..124480c0008d 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -100,7 +100,7 @@ void __init free_early(unsigned long start, unsigned long end) for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) ; - memcpy(&early_res[i], &early_res[i + 1], + memmove(&early_res[i], &early_res[i + 1], (j - 1 - i) * sizeof(struct early_res)); early_res[j - 1].end = 0; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 00bda7bcda63..147352df28b9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -extern asmlinkage void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c new file mode 100644 index 000000000000..edc5fbfe85c0 --- /dev/null +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -0,0 +1,243 @@ +/* + * AMD Family 10h mmconfig enablement + */ + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/dmi.h> +#include <asm/pci-direct.h> +#include <linux/sort.h> +#include <asm/io.h> +#include <asm/msr.h> +#include <asm/acpi.h> + +#include "../pci/pci.h" + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static u64 __cpuinitdata fam10h_pci_mmconf_base; +static int __cpuinitdata fam10h_pci_mmconf_base_status; + +static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, +}; + +struct range { + u64 start; + u64 end; +}; + +static int __cpuinit cmp_range(const void *x1, const void *x2) +{ + const struct range *r1 = x1; + const struct range *r2 = x2; + int start1, start2; + + start1 = r1->start >> 32; + start2 = r2->start >> 32; + + return start1 - start2; +} + +/*[47:0] */ +/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ +#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) +#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) +static void __cpuinit get_fam10h_pci_mmconf_base(void) +{ + int i; + unsigned bus; + unsigned slot; + int found; + + u64 val; + u32 address; + u64 tom2; + u64 base = FAM10H_PCI_MMCONF_BASE; + + int hi_mmio_num; + struct range range[8]; + + /* only try to get setting from BSP */ + /* -1 or 1 */ + if (fam10h_pci_mmconf_base_status) + return; + + if (!early_pci_allowed()) + goto fail; + + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; + + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + goto fail; + + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + + /* TOP_MEM2 is not enabled? */ + if (!(val & (1<<21))) { + tom2 = 0; + } else { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + tom2 = val & (0xffffULL<<32); + } + + if (base <= tom2) + base = tom2 + (1ULL<<32); + + /* + * need to check if the range is in the high mmio range that is + * above 4G + */ + hi_mmio_num = 0; + for (i = 0; i < 8; i++) { + u32 reg; + u64 start; + u64 end; + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + + if (!end) + continue; + + range[hi_mmio_num].start = start; + range[hi_mmio_num].end = end; + hi_mmio_num++; + } + + if (!hi_mmio_num) + goto out; + + /* sort the range */ + sort(range, hi_mmio_num, sizeof(struct range), cmp_range, NULL); + + if (range[hi_mmio_num - 1].end < base) + goto out; + if (range[0].start > base) + goto out; + + /* need to find one window */ + base = range[0].start - (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + base = range[hi_mmio_num - 1].end + (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + /* need to find window between ranges */ + if (hi_mmio_num > 1) + for (i = 0; i < hi_mmio_num - 1; i++) { + if (range[i + 1].start > (range[i].end + (1ULL << 32))) { + base = range[i].end + (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + } + } + +fail: + fam10h_pci_mmconf_base_status = -1; + return; +out: + fam10h_pci_mmconf_base = base; + fam10h_pci_mmconf_base_status = 1; +} + +void __cpuinit fam10h_check_enable_mmcfg(void) +{ + u64 val; + u32 address; + + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, val); + + /* try to make sure that AP's setting is identical to BSP setting */ + if (val & FAM10H_MMIO_CONF_ENABLE) { + unsigned busnbits; + busnbits = (val >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* only trust the one handle 256 buses, if acpi=off */ + if (!acpi_pci_disabled || busnbits >= 8) { + u64 base; + base = val & (0xffffULL << 32); + if (fam10h_pci_mmconf_base_status <= 0) { + fam10h_pci_mmconf_base = base; + fam10h_pci_mmconf_base_status = 1; + return; + } else if (fam10h_pci_mmconf_base == base) + return; + } + } + + /* + * if it is not enabled, try to enable it and assume only one segment + * with 256 buses + */ + get_fam10h_pci_mmconf_base(); + if (fam10h_pci_mmconf_base_status <= 0) + return; + + printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); + val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | + (FAM10H_MMIO_CONF_BUSRANGE_MASK<<FAM10H_MMIO_CONF_BUSRANGE_SHIFT)); + val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) | + FAM10H_MMIO_CONF_ENABLE; + wrmsrl(address, val); +} + +static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return 0; +} + +static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { + { + .callback = set_check_enable_amd_mmconf, + .ident = "Sun Microsystems Machine", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sun Microsystems"), + }, + }, + {} +}; + +void __init check_enable_amd_mmconf_dmi(void) +{ + dmi_check_system(mmconf_dmi_table); +} diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c new file mode 100644 index 000000000000..3e6672274807 --- /dev/null +++ b/arch/x86/kernel/olpc.c @@ -0,0 +1,260 @@ +/* + * Support for the OLPC DCON and OLPC EC access + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/string.h> +#include <asm/geode.h> +#include <asm/olpc.h> + +#ifdef CONFIG_OPEN_FIRMWARE +#include <asm/ofw.h> +#endif + +struct olpc_platform_t olpc_platform_info; +EXPORT_SYMBOL_GPL(olpc_platform_info); + +static DEFINE_SPINLOCK(ec_lock); + +/* what the timeout *should* be (in ms) */ +#define EC_BASE_TIMEOUT 20 + +/* the timeout that bugs in the EC might force us to actually use */ +static int ec_timeout = EC_BASE_TIMEOUT; + +static int __init olpc_ec_timeout_set(char *str) +{ + if (get_option(&str, &ec_timeout) != 1) { + ec_timeout = EC_BASE_TIMEOUT; + printk(KERN_ERR "olpc-ec: invalid argument to " + "'olpc_ec_timeout=', ignoring!\n"); + } + printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n", + ec_timeout); + return 1; +} +__setup("olpc_ec_timeout=", olpc_ec_timeout_set); + +/* + * These {i,o}bf_status functions return whether the buffers are full or not. + */ + +static inline unsigned int ibf_status(unsigned int port) +{ + return !!(inb(port) & 0x02); +} + +static inline unsigned int obf_status(unsigned int port) +{ + return inb(port) & 0x01; +} + +#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d)) +static int __wait_on_ibf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = ibf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = ibf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d)) +static int __wait_on_obf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = obf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = obf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +/* + * This allows the kernel to run Embedded Controller commands. The EC is + * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the + * available EC commands are here: + * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while + * OpenFirmware's source is available, the EC's is not. + */ +int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen) +{ + unsigned long flags; + int ret = -EIO; + int i; + + spin_lock_irqsave(&ec_lock, flags); + + /* Clear OBF */ + for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++) + inb(0x68); + if (i == 10) { + printk(KERN_ERR "olpc-ec: timeout while attempting to " + "clear OBF flag!\n"); + goto err; + } + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to " + "quiesce!\n"); + goto err; + } + +restart: + /* + * Note that if we time out during any IBF checks, that's a failure; + * we have to return. There's no way for the kernel to clear that. + * + * If we time out during an OBF check, we can restart the command; + * reissuing it will clear the OBF flag, and we should be alright. + * The OBF flag will sometimes misbehave due to what we believe + * is a hardware quirk.. + */ + printk(KERN_DEBUG "olpc-ec: running cmd 0x%x\n", cmd); + outb(cmd, 0x6c); + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to read " + "command!\n"); + goto err; + } + + if (inbuf && inlen) { + /* write data to EC */ + for (i = 0; i < inlen; i++) { + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC accept data!\n"); + goto err; + } + printk(KERN_DEBUG "olpc-ec: sending cmd arg 0x%x\n", + inbuf[i]); + outb(inbuf[i], 0x68); + } + } + if (outbuf && outlen) { + /* read data from EC */ + for (i = 0; i < outlen; i++) { + if (wait_on_obf(0x6c, 1)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC to provide data!\n"); + goto restart; + } + outbuf[i] = inb(0x68); + printk(KERN_DEBUG "olpc-ec: received 0x%x\n", + outbuf[i]); + } + } + + ret = 0; +err: + spin_unlock_irqrestore(&ec_lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_cmd); + +#ifdef CONFIG_OPEN_FIRMWARE +static void __init platform_detect(void) +{ + size_t propsize; + u32 rev; + + if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, + &propsize) || propsize != 4) { + printk(KERN_ERR "ofw: getprop call failed!\n"); + rev = 0; + } + olpc_platform_info.boardrev = be32_to_cpu(rev); +} +#else +static void __init platform_detect(void) +{ + /* stopgap until OFW support is added to the kernel */ + olpc_platform_info.boardrev = be32_to_cpu(0xc2); +} +#endif + +static int __init olpc_init(void) +{ + unsigned char *romsig; + + /* The ioremap check is dangerous; limit what we run it on */ + if (!is_geode() || geode_has_vsa2()) + return 0; + + spin_lock_init(&ec_lock); + + romsig = ioremap(0xffffffc0, 16); + if (!romsig) + return 0; + + if (strncmp(romsig, "CL1 Q", 7)) + goto unmap; + if (strncmp(romsig+6, romsig+13, 3)) { + printk(KERN_INFO "OLPC BIOS signature looks invalid. " + "Assuming not OLPC\n"); + goto unmap; + } + + printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); + olpc_platform_info.flags |= OLPC_F_PRESENT; + + /* get the platform revision */ + platform_detect(); + + /* assume B1 and above models always have a DCON */ + if (olpc_board_at_least(olpc_board(0xb1))) + olpc_platform_info.flags |= OLPC_F_DCON; + + /* get the EC revision */ + olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, + (unsigned char *) &olpc_platform_info.ecver, 1); + + /* check to see if the VSA exists */ + if (geode_has_vsa2()) + olpc_platform_info.flags |= OLPC_F_VSA; + + printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4, + olpc_platform_info.ecver); + +unmap: + iounmap(romsig); + return 0; +} + +postcore_initcall(olpc_init); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index a94fb959a87a..22c14e21c97c 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -29,6 +29,7 @@ #include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/pci.h> +#include <asm/pci-direct.h> #include <linux/efi.h> #include <linux/acpi.h> #include <linux/kallsyms.h> @@ -40,6 +41,7 @@ #include <linux/dmi.h> #include <linux/dma-mapping.h> #include <linux/ctype.h> +#include <linux/sort.h> #include <linux/uaccess.h> #include <linux/init_ohci1394_dma.h> #include <linux/kvm_para.h> @@ -288,6 +290,18 @@ static void __init parse_setup_data(void) } } +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +void __cpuinit fam10h_check_enable_mmcfg(void) +{ +} +void __init check_enable_amd_mmconf_dmi(void) +{ +} +#endif + /* * setup_arch - architecture-specific boot-time initializations * @@ -515,6 +529,9 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + + /* do this before identify_cpu for boot cpu */ + check_enable_amd_mmconf_dmi(); } static int __cpuinit get_model_name(struct cpuinfo_x86 *c) @@ -767,6 +784,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (c->x86 == 0x10) + fam10h_check_enable_mmcfg(); + if (amd_apic_timer_broken()) disable_apic_timer = 1; diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 8e05e7f7bd40..d92373630963 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -57,7 +57,7 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } @@ -593,7 +593,7 @@ static void do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; @@ -612,13 +612,12 @@ static void do_signal(struct pt_regs *regs) /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* - * a signal was successfully delivered; the saved + * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag + * clear the TS_RESTORE_SIGMASK flag. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } return; } @@ -645,8 +644,8 @@ static void do_signal(struct pt_regs *regs) * If there's no signal to deliver, we just put the saved sigmask * back. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } @@ -665,7 +664,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) } /* deal with pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); if (thread_info_flags & _TIF_HRTICK_RESCHED) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ccb2a4560c2d..e53b267662e7 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -427,7 +427,7 @@ static void do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) + if (current_thread_info()->status & TS_RESTORE_SIGMASK) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; @@ -444,11 +444,13 @@ static void do_signal(struct pt_regs *regs) /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { - /* a signal was successfully delivered; the saved + /* + * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag */ - clear_thread_flag(TIF_RESTORE_SIGMASK); + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } return; } @@ -476,8 +478,8 @@ static void do_signal(struct pt_regs *regs) * If there's no signal to deliver, we just put the saved sigmask * back. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); + if (current_thread_info()->status & TS_RESTORE_SIGMASK) { + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } @@ -498,7 +500,7 @@ void do_notify_resume(struct pt_regs *regs, void *unused, #endif /* CONFIG_X86_MCE */ /* deal with pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) + if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); if (thread_info_flags & _TIF_HRTICK_RESCHED) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 04c662ba18f1..84241a256dc8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1149,14 +1149,10 @@ static int __init smp_sanity_check(unsigned max_cpus) "forcing use of dummy APIC emulation.\n"); smpboot_clear_io_apic(); #ifdef CONFIG_X86_32 - if (nmi_watchdog == NMI_LOCAL_APIC) { - printk(KERN_INFO "activating minimal APIC for" - "NMI watchdog use.\n"); - connect_bsp_APIC(); - setup_local_APIC(); - end_local_APIC_setup(); - } + connect_bsp_APIC(); #endif + setup_local_APIC(); + end_local_APIC_setup(); return -1; } diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 1a89e93f3f1c..2ff21f398934 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -115,7 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ void __init hpet_time_init(void) { diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index caf2a26f5cfd..ba8c0b75ab0a 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -133,7 +133,7 @@ int is_vsmp_box(void) } } #else -static int __init detect_vsmp_box(void) +static void __init detect_vsmp_box(void) { } int is_vsmp_box(void) diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9c..1f476e477844 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -13,12 +13,15 @@ #include <linux/nodemask.h> #include <asm/io.h> #include <linux/pci_ids.h> +#include <linux/acpi.h> #include <asm/types.h> #include <asm/mmzone.h> #include <asm/proto.h> #include <asm/e820.h> #include <asm/pci-direct.h> #include <asm/numa.h> +#include <asm/mpspec.h> +#include <asm/apic.h> static __init int find_northbridge(void) { @@ -44,6 +47,30 @@ static __init int find_northbridge(void) return -1; } +static __init void early_get_boot_cpu_id(void) +{ + /* + * need to get boot_cpu_id so can use that to create apicid_to_node + * in k8_scan_nodes() + */ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); +#ifdef CONFIG_ACPI + /* + * Read APIC information from ACPI tables. + */ + early_acpi_boot_init(); +#endif + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); + early_init_lapic_mapping(); +} + int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; @@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned cores; unsigned bits; int j; + unsigned apicid_base; if (!early_pci_allowed()) return -1; @@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) /* use the coreid bits from early_identify_cpu */ bits = boot_cpu_data.x86_coreid_bits; cores = (1<<bits); + apicid_base = 0; + /* need to get boot_cpu_id early for system with apicid lifting */ + early_get_boot_cpu_id(); + if (boot_cpu_physical_apicid > 0) { + printk(KERN_INFO "BSP APIC ID: %02x\n", + boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { nodeid = nodeids[i]; - for (j = 0; j < cores; j++) + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(nodeid << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index cdd6828b5abb..2a1516efb542 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -3,6 +3,7 @@ obj-y := i386.o init.o obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_PCI_OLPC) += olpc.o pci-y := fixup.o pci-$(CONFIG_ACPI) += acpi.o @@ -10,5 +11,6 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o +pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf143..8fbd19832cf6 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2664cb3fc96c..1a9c0c6a1a18 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do { struct pci_bus *bus; struct pci_sysdata *sd; + int node; +#ifdef CONFIG_ACPI_NUMA int pxm; +#endif dmi_check_system(acpi_pciprobe_dmi_table); @@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do return NULL; } + node = -1; +#ifdef CONFIG_ACPI_NUMA + pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) + node = pxm_to_node(pxm); + if (node != -1) + set_mp_bus_to_node(busnum, node); + else + node = get_mp_bus_to_node(busnum); +#endif + /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. @@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } sd->domain = domain; - sd->node = -1; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_ACPI_NUMA - if (pxm >= 0) - sd->node = pxm_to_node(pxm); -#endif + sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. @@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do kfree(sd); #ifdef CONFIG_ACPI_NUMA - if (bus != NULL) { + if (bus) { if (pxm >= 0) { - printk("bus %d -> pxm %d -> node %d\n", + printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", busnum, pxm, pxm_to_node(pxm)); } } @@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); - return bus; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 75fcc29ecf52..2a4d751818b7 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return NULL; } + sd->node = get_mp_bus_to_node(busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + return bus; } extern u8 pci_cache_line_size; @@ -420,6 +425,10 @@ char * __devinit pcibios_setup(char *str) pci_probe &= ~PCI_PROBE_MMCONF; return NULL; } + else if (!strcmp(str, "check_enable_amd_mmconf")) { + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return NULL; + } #endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); @@ -480,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) +struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -495,10 +504,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); return NULL; } - sd->node = -1; - bus = pci_scan_bus(busno, &pci_root_ops, sd); + sd->node = node; + bus = pci_scan_bus(busno, ops, sd); if (!bus) kfree(sd); return bus; } + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + return pci_scan_bus_on_node(busno, &pci_root_ops, -1); +} diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 42f3e4cad179..21d1e0e0d535 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -258,7 +258,8 @@ void __init pci_direct_init(int type) { if (type == 0) return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); if (type == 1) raw_pci_ops = &pci_direct_conf1; else @@ -275,8 +276,10 @@ int __init pci_direct_probe(void) if (!region) goto type2; - if (pci_check_type1()) + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; return 1; + } release_resource(region); type2: @@ -290,7 +293,6 @@ int __init pci_direct_probe(void) goto fail2; if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; return 2; } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index a5ef5f551373..b60b2abd480c 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -493,3 +493,20 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); + +/* + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses. So check it + */ +static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +{ + dev->cfg_size = pci_cfg_space_size_ext(dev, 0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 3de9f9ba2da6..dd30c6076b5d 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -6,16 +6,17 @@ in the right sequence from here. */ static __init int pci_access_init(void) { - int type __maybe_unused = 0; - #ifdef CONFIG_PCI_DIRECT + int type = 0; + type = pci_direct_probe(); #endif -#ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(type); + + pci_mmcfg_early_init(); + +#ifdef CONFIG_PCI_OLPC + pci_olpc_init(); #endif - if (raw_pci_ops) - return 0; #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif @@ -28,7 +29,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); #endif - if (!raw_pci_ops) + if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 579745ca6b66..0908fca901bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void) busmap[e->bus] = 1; } for(i = 1; i < 256; i++) { + int node; if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus_with_sysdata(i)) + node = get_mp_bus_to_node(i); + if (pci_scan_bus_on_node(i, &pci_root_ops, node)) printk(KERN_INFO "PCI: Discovered primary peer " "bus %02x [IRQ]\n", i); } diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 9cc813e29706..ab6d4b18a88f 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -1,83 +1,536 @@ #include <linux/init.h> #include <linux/pci.h> +#include <asm/pci-direct.h> #include <asm/mpspec.h> #include <linux/cpumask.h> +#include <linux/topology.h> /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID_REGISTER 0x60 -#define NODE_ID(dword) (dword & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0x94 -#define LDT_BUS_NUMBER_REGISTER_1 0xB4 -#define LDT_BUS_NUMBER_REGISTER_2 0xD4 -#define NR_LDT_BUS_NUMBER_REGISTERS 3 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 + +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; + +#ifdef CONFIG_NUMA + +#define BUS_NR 256 + +static int mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node = -1; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return node; + + node = mp_bus_to_node[busnum]; + + /* + * let numa_node_id to decide it later in dma_alloc_pages + * if there is no ram on that node + */ + if (node != -1 && !node_online(node)) + node = -1; + + return node; +} +#endif + +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* if only one root bus, don't need to anything */ + if (pci_root_num < 2) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + +static u64 __initdata fam10h_mmconf_start; +static u64 __initdata fam10h_mmconf_end; +static void __init get_pci_mmcfg_amd_fam10h_range(void) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + /* assume all cpus from fam10h have mmconf */ + if (boot_cpu_data.x86 < 0x10) + return; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); + + segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + fam10h_mmconf_start = base; + fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; +} /** - * fill_mp_bus_to_cpumask() + * early_fill_mp_bus_to_node() + * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -fill_mp_bus_to_cpumask(void) +static int __init early_fill_mp_bus_info(void) { - struct pci_dev *nb_dev = NULL; - int i, j; - u32 ldtbus, nid; - static int lbnr[3] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2 - }; - - while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { - pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); - - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); - /* - * if there are no busses hanging off of the current - * ldt link then both the secondary and subordinate - * bus number fields are set to 0. - * - * RED-PEN - * This is slightly broken because it assumes - * HT node IDs == Linux node ids, which is not always - * true. However it is probably mostly true. - */ - if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 - && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { - for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); - j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - j++) { - struct pci_bus *bus; - struct pci_sysdata *sd; - - long node = NODE_ID(nid); - /* Algorithm a bit dumb, but - it shouldn't matter here */ - bus = pci_find_bus(0, j); - if (!bus) - continue; - if (!node_online(node)) - node = 0; - - sd = bus->sysdata; - sd->node = node; - } + int i; + int j; + unsigned bus; + unsigned slot; + int found; + int node; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; + +#ifdef CONFIG_NUMA + for (i = 0; i < BUS_NR; i++) + mp_bus_to_node[i] = -1; +#endif + + if (!early_pci_allowed()) + return -1; + + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; + + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; + + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); + + /* Check if that register is enabled for bus range */ + if ((reg & 7) != 3) + continue; + + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA + for (j = min_bus; j <= max_bus; j++) + mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; + } + + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", + node, link, (u64)start, (u64)end); + + /* kernel only handle 16 bit only */ + if (end > 0xffff) + end = 0xffff; + update_res(info, start, end, IORESOURCE_IO, 1); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + range[0].end = (0xfdULL<<32) - 1; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* get mmconfig */ + get_pci_mmcfg_amd_fam10h_range(); + /* need to take out mmconf range */ + if (fam10h_mmconf_end) { + printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); + } + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + + printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", + node, link, (u64)start, (u64)end); + /* + * some sick allocation would have range overlap with fam10h + * mmconf range, so need to update start and end. + */ + if (fam10h_mmconf_end) { + int changed = 0; + u64 endx = 0; + if (start >= fam10h_mmconf_start && + start <= fam10h_mmconf_end) { + start = fam10h_mmconf_end + 1; + changed = 1; + } + + if (end >= fam10h_mmconf_start && + end <= fam10h_mmconf_end) { + end = fam10h_mmconf_start - 1; + changed = 1; + } + + if (start < fam10h_mmconf_start && + end > fam10h_mmconf_end) { + /* we got a hole */ + endx = fam10h_mmconf_start - 1; + update_res(info, start, endx, IORESOURCE_MEM, 0); + update_range(range, start, endx); + printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); + start = fam10h_mmconf_end + 1; + changed = 1; + } + if (changed) { + if (start <= end) { + printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); + } else { + printk(KERN_CONT "%s\n", endx?"":" ==> none"); + continue; + } } } + + update_res(info, start, end, IORESOURCE_MEM, 1); + update_range(range, start, end); + printk(KERN_CONT "\n"); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA + for (i = 0; i < BUS_NR; i++) { + node = mp_bus_to_node[i]; + if (node >= 0) + printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); + } +#endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } } return 0; } -fs_initcall(fill_mp_bus_to_cpumask); +postcore_initcall(early_fill_mp_bus_info); diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index e041ced0ce13..a67921ce60af 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -12,6 +12,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) { int n, devfn; + long node; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; @@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) u32 l; if (pci_find_bus(0, n)) continue; + node = get_mp_bus_to_node(n); for (devfn = 0; devfn < 256; devfn += 8) { if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_with_sysdata(n); + pci_scan_bus_on_node(n, &pci_root_ops, node); break; } } diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8d54df4dfaad..0cfebecf2a8f 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted; static const char __init *pci_mmcfg_e7520(void) { u32 win; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; if(win == 0x0000 || win == 0xf000) @@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void) pci_mmcfg_config_num = 1; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) @@ -100,33 +100,102 @@ static const char __init *pci_mmcfg_intel_945(void) return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } +static const char __init *pci_mmcfg_amd_fam10h(void) +{ + u32 low, high, address; + u64 base, msr; + int i; + unsigned segnbits = 0, busnbits; + + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + if (rdmsr_safe(address, &low, &high)) + return NULL; + + msr = high; + msr <<= 32; + msr |= low; + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); + + busnbits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* + * only handle bus 0 ? + * need to skip it + */ + if (!busnbits) + return NULL; + + if (busnbits > 8) { + segnbits = busnbits - 8; + busnbits = 8; + } + + pci_mmcfg_config_num = (1 << segnbits); + pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) * + pci_mmcfg_config_num, GFP_KERNEL); + if (!pci_mmcfg_config) + return NULL; + + for (i = 0; i < (1 << segnbits); i++) { + pci_mmcfg_config[i].address = base + (1<<28) * i; + pci_mmcfg_config[i].pci_segment = i; + pci_mmcfg_config[i].start_bus_number = 0; + pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; + } + + return "AMD Family 10h NB"; +} + struct pci_mmcfg_hostbridge_probe { + u32 bus; + u32 devfn; u32 vendor; u32 device; const char *(*probe)(void); }; static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, + { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, }; static int __init pci_mmcfg_check_hostbridge(void) { u32 l; + u32 bus, devfn; u16 vendor, device; int i; const char *name; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); - vendor = l & 0xffff; - device = (l >> 16) & 0xffff; + if (!raw_pci_ops) + return 0; pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + bus = pci_mmcfg_probes[i].bus; + devfn = pci_mmcfg_probes[i].devfn; + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + if (pci_mmcfg_probes[i].vendor == vendor && pci_mmcfg_probes[i].device == device) name = pci_mmcfg_probes[i].probe(); @@ -173,9 +242,78 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) pci_mmcfg_resources_inserted = 1; } -static void __init pci_mmcfg_reject_broken(int type) +static acpi_status __init check_mcfg_resource(struct acpi_resource *res, + void *data) +{ + struct resource *mcfg_res = data; + struct acpi_resource_address64 address; + acpi_status status; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) { + struct acpi_resource_fixed_memory32 *fixmem32 = + &res->data.fixed_memory32; + if (!fixmem32) + return AE_OK; + if ((mcfg_res->start >= fixmem32->address) && + (mcfg_res->end < (fixmem32->address + + fixmem32->address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + } + if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) && + (res->type != ACPI_RESOURCE_TYPE_ADDRESS64)) + return AE_OK; + + status = acpi_resource_to_address64(res, &address); + if (ACPI_FAILURE(status) || + (address.address_length <= 0) || + (address.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + if ((mcfg_res->start >= address.minimum) && + (mcfg_res->end < (address.minimum + address.address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, + void *context, void **rv) +{ + struct resource *mcfg_res = context; + + acpi_walk_resources(handle, METHOD_NAME__CRS, + check_mcfg_resource, context); + + if (mcfg_res->flags) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static int __init is_acpi_reserved(unsigned long start, unsigned long end) +{ + struct resource mcfg_res; + + mcfg_res.start = start; + mcfg_res.end = end; + mcfg_res.flags = 0; + + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); + + if (!mcfg_res.flags) + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, + NULL); + + return mcfg_res.flags; +} + +static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; + int i; if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || @@ -184,51 +322,80 @@ static void __init pci_mmcfg_reject_broken(int type) cfg = &pci_mmcfg_config[0]; - /* - * Handle more broken MCFG tables on Asus etc. - * They only contain a single entry for bus 0-0. - */ - if (pci_mmcfg_config_num == 1 && - cfg->pci_segment == 0 && - (cfg->start_bus_number | cfg->end_bus_number) == 0) { - printk(KERN_ERR "PCI: start and end of bus number is 0. " - "Rejected as broken MCFG.\n"); - goto reject; + for (i = 0; i < pci_mmcfg_config_num; i++) { + int valid = 0; + u32 size = (cfg->end_bus_number + 1) << 20; + cfg = &pci_mmcfg_config[i]; + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " + "segment %hu buses %u - %u\n", + i, (unsigned long)cfg->address, cfg->pci_segment, + (unsigned int)cfg->start_bus_number, + (unsigned int)cfg->end_bus_number); + + if (!early && + is_acpi_reserved(cfg->address, cfg->address + size - 1)) { + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " + "in ACPI motherboard resources\n", + cfg->address); + valid = 1; + } + + if (valid) + continue; + + if (!early) + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" + " reserved in ACPI motherboard resources\n", + cfg->address); + /* Don't try to do this check unless configuration + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops && e820_all_mapped(cfg->address, + cfg->address + size - 1, + E820_RESERVED)) { + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in E820\n", + cfg->address); + valid = 1; + } + + if (!valid) + goto reject; } - /* - * Only do this check when type 1 works. If it doesn't work - * assume we run on a Mac and always use MCFG - */ - if (type == 1 && !e820_all_mapped(cfg->address, - cfg->address + MMCONFIG_APER_MIN, - E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " E820-reserved\n", cfg->address); - goto reject; - } return; reject: printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + pci_mmcfg_arch_free(); kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; pci_mmcfg_config_num = 0; } -void __init pci_mmcfg_init(int type) -{ - int known_bridge = 0; +static int __initdata known_bridge; +void __init __pci_mmcfg_init(int early) +{ + /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; - if (type == 1 && pci_mmcfg_check_hostbridge()) - known_bridge = 1; + /* MMCONFIG already enabled */ + if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) + return; + + /* for late to exit */ + if (known_bridge) + return; + + if (early) { + if (pci_mmcfg_check_hostbridge()) + known_bridge = 1; + } if (!known_bridge) { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type); + pci_mmcfg_reject_broken(early); } if ((pci_mmcfg_config_num == 0) || @@ -249,6 +416,16 @@ void __init pci_mmcfg_init(int type) } } +void __init pci_mmcfg_early_init(void) +{ + __pci_mmcfg_init(1); +} + +void __init pci_mmcfg_late_init(void) +{ + __pci_mmcfg_init(0); +} + static int __init pci_mmcfg_late_insert_resources(void) { /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 081816ada057..f3c761dce695 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -136,3 +136,7 @@ int __init pci_mmcfg_arch_init(void) raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ +} diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 9207fd49233c..a1994163c99d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -127,7 +127,7 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) int __init pci_mmcfg_arch_init(void) { int i; - pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * + pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); @@ -141,9 +141,29 @@ int __init pci_mmcfg_arch_init(void) printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", pci_mmcfg_config[i].pci_segment); + pci_mmcfg_arch_free(); return 0; } } raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ + int i; + + if (pci_mmcfg_virt == NULL) + return; + + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if (pci_mmcfg_virt[i].virt) { + iounmap(pci_mmcfg_virt[i].virt); + pci_mmcfg_virt[i].virt = NULL; + pci_mmcfg_virt[i].cfg = NULL; + } + } + + kfree(pci_mmcfg_virt); + pci_mmcfg_virt = NULL; +} diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c new file mode 100644 index 000000000000..022943999b84 --- /dev/null +++ b/arch/x86/pci/mp_bus_to_node.c @@ -0,0 +1,23 @@ +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/topology.h> + +#define BUS_NR 256 + +static unsigned char mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = (unsigned char) node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return 0; + node = mp_bus_to_node[busnum]; + return node; +} diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c new file mode 100644 index 000000000000..5e7636558c02 --- /dev/null +++ b/arch/x86/pci/olpc.c @@ -0,0 +1,313 @@ +/* + * Low-level PCI config space access for OLPC systems who lack the VSA + * PCI virtualization software. + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * The AMD Geode chipset (ie: GX2 processor, cs5536 I/O companion device) + * has some I/O functions (display, southbridge, sound, USB HCIs, etc) + * that more or less behave like PCI devices, but the hardware doesn't + * directly implement the PCI configuration space headers. AMD provides + * "VSA" (Virtual System Architecture) software that emulates PCI config + * space for these devices, by trapping I/O accesses to PCI config register + * (CF8/CFC) and running some code in System Management Mode interrupt state. + * On the OLPC platform, we don't want to use that VSA code because + * (a) it slows down suspend/resume, and (b) recompiling it requires special + * compilers that are hard to get. So instead of letting the complex VSA + * code simulate the PCI config registers for the on-chip devices, we + * just simulate them the easy way, by inserting the code into the + * pci_write_config and pci_read_config path. Most of the config registers + * are read-only anyway, so the bulk of the simulation is just table lookup. + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include <asm/olpc.h> +#include <asm/geode.h> +#include "pci.h" + +/* + * In the tables below, the first two line (8 longwords) are the + * size masks that are used when the higher level PCI code determines + * the size of the region by writing ~0 to a base address register + * and reading back the result. + * + * The following lines are the values that are read during normal + * PCI config access cycles, i.e. not after just having written + * ~0 to a base address register. + */ + +static const uint32_t lxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x281022, 0x2200005, 0x6000021, 0x80f808, /* AMD Vendor ID */ + 0x0, 0x0, 0x0, 0x0, /* No virtual registers, hence no BAR */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0xfffffffd, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x28100b, 0x2200005, 0x6000021, 0x80f808, /* NSC Vendor ID */ + 0xac1d, 0x0, 0x0, 0x0, /* I/O BAR - base of virtual registers */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t lxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff000008, 0xffffc000, 0xffffc000, 0xffffc000, + 0xffffc000, 0x0, 0x0, 0x0, + + 0x20811022, 0x2200003, 0x3000000, 0x0, /* AMD Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0xfe00c000, 0x0, 0x0, 0x30100b, /* VIP */ + 0x0, 0x0, 0x0, 0x10e, /* INTA, IRQ14 for graphics accel */ + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff800008, 0xffffc000, 0xffffc000, 0xffffc000, + 0x0, 0x0, 0x0, 0x0, + + 0x30100b, 0x2200003, 0x3000000, 0x0, /* NSC Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0x0, 0x0, 0x0, 0x30100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t aes_hdr[] = { /* dev 1 function 2 - devfn = 0xa */ + 0xffffc000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20821022, 0x2a00006, 0x10100000, 0x8, /* NSC Vendor ID */ + 0xfe010000, 0x0, 0x0, 0x0, /* AES registers */ + 0x0, 0x0, 0x0, 0x20821022, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + + +static const uint32_t isa_hdr[] = { /* dev f function 0 - devfn = 78 */ + 0xfffffff9, 0xffffff01, 0xffffffc1, 0xffffffe1, + 0xffffff81, 0xffffffc1, 0x0, 0x0, + + 0x20901022, 0x2a00049, 0x6010003, 0x802000, + 0x18b1, 0x1001, 0x1801, 0x1881, /* SMB-8 GPIO-256 MFGPT-64 IRQ-32 */ + 0x1401, 0x1841, 0x0, 0x20901022, /* PMS-128 ACPI-64 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xaa5b, /* IRQ steering */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ac97_hdr[] = { /* dev f function 3 - devfn = 7b */ + 0xffffff81, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20931022, 0x2a00041, 0x4010001, 0x0, + 0x1481, 0x0, 0x0, 0x0, /* I/O BAR-128 */ + 0x0, 0x0, 0x0, 0x20931022, + 0x0, 0x0, 0x0, 0x205, /* IntB, IRQ5 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ohci_hdr[] = { /* dev f function 4 - devfn = 7c */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20941022, 0x2300006, 0xc031002, 0x0, + 0xfe01a000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20941022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, + 44 is mask 8103 (power control) */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ehci_hdr[] = { /* dev f function 4 - devfn = 7d */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20951022, 0x2300006, 0xc032002, 0x0, + 0xfe01b000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20951022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, 44 is + mask 8103 (power control) */ +#if 0 + 0x1, 0x40080000, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ +#endif + 0x01000001, 0x0, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ + 0x2020, 0x0, 0x0, 0x0, /* (EHCI page 8) 60 SBRN (R/O), + 61 FLADJ (R/W), PORTWAKECAP */ +}; + +static uint32_t ff_loc = ~0; +static uint32_t zero_loc; +static int bar_probing; /* Set after a write of ~0 to a BAR */ +static int is_lx; + +#define NB_SLOT 0x1 /* Northbridge - GX chip - Device 1 */ +#define SB_SLOT 0xf /* Southbridge - CS5536 chip - Device F */ + +static int is_simulated(unsigned int bus, unsigned int devfn) +{ + return (!bus && ((PCI_SLOT(devfn) == NB_SLOT) || + (PCI_SLOT(devfn) == SB_SLOT))); +} + +static uint32_t *hdr_addr(const uint32_t *hdr, int reg) +{ + uint32_t addr; + + /* + * This is a little bit tricky. The header maps consist of + * 0x20 bytes of size masks, followed by 0x70 bytes of header data. + * In the normal case, when not probing a BAR's size, we want + * to access the header data, so we add 0x20 to the reg offset, + * thus skipping the size mask area. + * In the BAR probing case, we want to access the size mask for + * the BAR, so we subtract 0x10 (the config header offset for + * BAR0), and don't skip the size mask area. + */ + + addr = (uint32_t)hdr + reg + (bar_probing ? -0x10 : 0x20); + + bar_probing = 0; + return (uint32_t *)addr; +} + +static int pci_olpc_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t *value) +{ + uint32_t *addr; + + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); + + /* + * No device has config registers past 0x70, so we save table space + * by not storing entries for the nonexistent registers + */ + if (reg >= 0x70) + addr = &zero_loc; + else { + switch (devfn) { + case 0x8: + addr = hdr_addr(is_lx ? lxnb_hdr : gxnb_hdr, reg); + break; + case 0x9: + addr = hdr_addr(is_lx ? lxfb_hdr : gxfb_hdr, reg); + break; + case 0xa: + addr = is_lx ? hdr_addr(aes_hdr, reg) : &ff_loc; + break; + case 0x78: + addr = hdr_addr(isa_hdr, reg); + break; + case 0x7b: + addr = hdr_addr(ac97_hdr, reg); + break; + case 0x7c: + addr = hdr_addr(ohci_hdr, reg); + break; + case 0x7d: + addr = hdr_addr(ehci_hdr, reg); + break; + default: + addr = &ff_loc; + break; + } + } + switch (len) { + case 1: + *value = *(uint8_t *)addr; + break; + case 2: + *value = *(uint16_t *)addr; + break; + case 4: + *value = *addr; + break; + default: + BUG(); + } + + return 0; +} + +static int pci_olpc_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t value) +{ + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); + + /* XXX we may want to extend this to simulate EHCI power management */ + + /* + * Mostly we just discard writes, but if the write is a size probe + * (i.e. writing ~0 to a BAR), we remember it and arrange to return + * the appropriate size mask on the next read. This is cheating + * to some extent, because it depends on the fact that the next + * access after such a write will always be a read to the same BAR. + */ + + if ((reg >= 0x10) && (reg < 0x2c)) { + /* write is to a BAR */ + if (value == ~0) + bar_probing = 1; + } else { + /* + * No warning on writes to ROM BAR, CMD, LATENCY_TIMER, + * CACHE_LINE_SIZE, or PM registers. + */ + if ((reg != PCI_ROM_ADDRESS) && (reg != PCI_COMMAND_MASTER) && + (reg != PCI_LATENCY_TIMER) && + (reg != PCI_CACHE_LINE_SIZE) && (reg != 0x44)) + printk(KERN_WARNING "OLPC PCI: Config write to devfn" + " %x reg %x value %x\n", devfn, reg, value); + } + + return 0; +} + +static struct pci_raw_ops pci_olpc_conf = { + .read = pci_olpc_read, + .write = pci_olpc_write, +}; + +void __init pci_olpc_init(void) +{ + if (!machine_is_olpc() || olpc_has_vsa()) + return; + + printk(KERN_INFO "PCI: Using configuration type OLPC\n"); + raw_pci_ops = &pci_olpc_conf; + is_lx = is_geode_lx(); +} diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c4bddaeff619..c58805a92db5 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -26,6 +26,7 @@ #define PCI_ASSIGN_ALL_BUSSES 0x4000 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; @@ -97,11 +98,12 @@ extern struct pci_raw_ops pci_direct_conf1; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(int type); +extern void pci_olpc_init(void); /* pci-mmconfig.c */ extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space |