diff options
Diffstat (limited to 'arch/x86/kernel')
33 files changed, 376 insertions, 401 deletions
| diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 62445ba2f8a8..7338ef2218bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,13 +41,13 @@ obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o  obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o  obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o  obj-y			+= bootflag.o e820.o -obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o +obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o  obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o  obj-y			+= tsc.o io_delay.o rtc.o  obj-y			+= pci-iommu_table.o  obj-y			+= resource.o -obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o +obj-y				+= trampoline.o trampoline_$(BITS).o  obj-y				+= process.o  obj-y				+= i387.o xsave.o  obj-y				+= ptrace.o @@ -55,10 +55,12 @@ obj-$(CONFIG_X86_32)		+= tls.o  obj-$(CONFIG_IA32_EMULATION)	+= tls.o  obj-y				+= step.o  obj-$(CONFIG_INTEL_TXT)		+= tboot.o +obj-$(CONFIG_ISA_DMA_API)	+= i8237.o  obj-$(CONFIG_STACKTRACE)	+= stacktrace.o  obj-y				+= cpu/  obj-y				+= acpi/  obj-y				+= reboot.o +obj-$(CONFIG_X86_32)		+= reboot_32.o  obj-$(CONFIG_MCA)		+= mca_32.o  obj-$(CONFIG_X86_MSR)		+= msr.o  obj-$(CONFIG_X86_CPUID)		+= cpuid.o @@ -69,7 +71,6 @@ obj-$(CONFIG_SMP)		+= smp.o  obj-$(CONFIG_SMP)		+= smpboot.o  obj-$(CONFIG_SMP)		+= tsc_sync.o  obj-$(CONFIG_SMP)		+= setup_percpu.o -obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o  obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o  obj-y				+= apic/  obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 28595d6df47c..ead21b663117 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -6,11 +6,17 @@  #include <asm/page_types.h>  #include <asm/pgtable_types.h>  #include <asm/processor-flags.h> +#include "wakeup.h"  	.code16 -	.section ".header", "a" +	.section ".jump", "ax" +	.globl	_start +_start: +	cli +	jmp	wakeup_code  /* This should match the structure in wakeup.h */ +		.section ".header", "a"  		.globl	wakeup_header  wakeup_header:  video_mode:	.short	0	/* Video mode number */ @@ -30,14 +36,11 @@ wakeup_jmp:	.byte	0xea	/* ljmpw */  wakeup_jmp_off:	.word	3f  wakeup_jmp_seg:	.word	0  wakeup_gdt:	.quad	0, 0, 0 -signature:	.long	0x51ee1111 +signature:	.long	WAKEUP_HEADER_SIGNATURE  	.text -	.globl	_start  	.code16  wakeup_code: -_start: -	cli  	cld  	/* Apparently some dimwit BIOS programmers don't know how to @@ -77,12 +80,12 @@ _start:  	/* Check header signature... */  	movl	signature, %eax -	cmpl	$0x51ee1111, %eax +	cmpl	$WAKEUP_HEADER_SIGNATURE, %eax  	jne	bogus_real_magic  	/* Check we really have everything... */  	movl	end_signature, %eax -	cmpl	$0x65a22c82, %eax +	cmpl	$WAKEUP_END_SIGNATURE, %eax  	jne	bogus_real_magic  	/* Call the C code */ @@ -147,3 +150,7 @@ wakeup_heap:  wakeup_stack:  	.space	2048  wakeup_stack_end: + +	.section ".signature","a" +end_signature: +	.long	WAKEUP_END_SIGNATURE diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index 69d38d0b2b64..e1828c07e79c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -35,7 +35,8 @@ struct wakeup_header {  extern struct wakeup_header wakeup_header;  #endif -#define HEADER_OFFSET 0x3f00 -#define WAKEUP_SIZE   0x4000 +#define WAKEUP_HEADER_OFFSET	8 +#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 +#define WAKEUP_END_SIGNATURE	0x65a22c82  #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 060fff8f5c5b..d4f8010a5b1b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -13,9 +13,19 @@ ENTRY(_start)  SECTIONS  {  	. = 0; +	.jump	: { +		*(.jump) +	} = 0x90909090 + +	. = WAKEUP_HEADER_OFFSET; +	.header : { +		*(.header) +	} + +	. = ALIGN(16);  	.text : {  		 *(.text*) -	} +	} = 0x90909090  	. = ALIGN(16);  	.rodata : { @@ -33,11 +43,6 @@ SECTIONS  		 *(.data*)  	} -	.signature : { -		end_signature = .; -		LONG(0x65a22c82) -	} -  	. = ALIGN(16);  	.bss :	{  		__bss_start = .; @@ -45,20 +50,13 @@ SECTIONS  		__bss_end = .;  	} -	. = HEADER_OFFSET; -	.header : { -		*(.header) +	.signature : { +		*(.signature)  	} -	. = ALIGN(16);  	_end = .;  	/DISCARD/ : {  		*(.note*)  	} - -	/* -	 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: -	 */ -	. = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");  } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 68d1537b8c81..ff93bc1b09c3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -18,37 +18,28 @@  #include "realmode/wakeup.h"  #include "sleep.h" -unsigned long acpi_wakeup_address;  unsigned long acpi_realmode_flags; -/* address in low memory of the wakeup routine. */ -static unsigned long acpi_realmode; -  #if defined(CONFIG_SMP) && defined(CONFIG_64BIT)  static char temp_stack[4096];  #endif  /** - * acpi_save_state_mem - save kernel state + * acpi_suspend_lowlevel - save kernel state   *   * Create an identity mapped page table and copy the wakeup routine to   * low memory. - * - * Note that this is too late to change acpi_wakeup_address.   */ -int acpi_save_state_mem(void) +int acpi_suspend_lowlevel(void)  {  	struct wakeup_header *header; +	/* address in low memory of the wakeup routine. */ +	char *acpi_realmode; -	if (!acpi_realmode) { -		printk(KERN_ERR "Could not allocate memory during boot, " -		       "S3 disabled\n"); -		return -ENOMEM; -	} -	memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE); +	acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); -	header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET); -	if (header->signature != 0x51ee1111) { +	header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); +	if (header->signature != WAKEUP_HEADER_SIGNATURE) {  		printk(KERN_ERR "wakeup header does not match\n");  		return -EINVAL;  	} @@ -68,9 +59,7 @@ int acpi_save_state_mem(void)  	/* GDT[0]: GDT self-pointer */  	header->wakeup_gdt[0] =  		(u64)(sizeof(header->wakeup_gdt) - 1) + -		((u64)(acpi_wakeup_address + -			((char *)&header->wakeup_gdt - (char *)acpi_realmode)) -				<< 16); +		((u64)__pa(&header->wakeup_gdt) << 16);  	/* GDT[1]: big real mode-like code segment */  	header->wakeup_gdt[1] =  		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); @@ -96,7 +85,7 @@ int acpi_save_state_mem(void)  	header->pmode_cr3 = (u32)__pa(&initial_page_table);  	saved_magic = 0x12345678;  #else /* CONFIG_64BIT */ -	header->trampoline_segment = setup_trampoline() >> 4; +	header->trampoline_segment = trampoline_address() >> 4;  #ifdef CONFIG_SMP  	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);  	early_gdt_descr.address = @@ -107,56 +96,10 @@ int acpi_save_state_mem(void)         saved_magic = 0x123456789abcdef0L;  #endif /* CONFIG_64BIT */ +	do_suspend_lowlevel();  	return 0;  } -/* - * acpi_restore_state - undo effects of acpi_save_state_mem - */ -void acpi_restore_state_mem(void) -{ -} - - -/** - * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation - * - * We allocate a page from the first 1MB of memory for the wakeup - * routine for when we come back from a sleep state. The - * runtime allocator allows specification of <16MB pages, but not - * <1MB pages. - */ -void __init acpi_reserve_wakeup_memory(void) -{ -	phys_addr_t mem; - -	if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { -		printk(KERN_ERR -		       "ACPI: Wakeup code way too big, S3 disabled.\n"); -		return; -	} - -	mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); - -	if (mem == MEMBLOCK_ERROR) { -		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); -		return; -	} -	acpi_realmode = (unsigned long) phys_to_virt(mem); -	acpi_wakeup_address = mem; -	memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); -} - -int __init acpi_configure_wakeup_memory(void) -{ -	if (acpi_realmode) -		set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); - -	return 0; -} -arch_initcall(acpi_configure_wakeup_memory); - -  static int __init acpi_sleep_setup(char *str)  {  	while ((str != NULL) && (*str != '\0')) { diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index adbcbaa6f1df..416d4be13fef 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -4,13 +4,12 @@  #include <asm/trampoline.h> -extern char wakeup_code_start, wakeup_code_end; -  extern unsigned long saved_video_mode;  extern long saved_magic;  extern int wakeup_pmode_return; -extern char swsusp_pg_dir[PAGE_SIZE];  extern unsigned long acpi_copy_wakeup_routine(unsigned long);  extern void wakeup_long64(void); + +extern void do_suspend_lowlevel(void); diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S index 6ff3b5730575..63b8ab524f2c 100644 --- a/arch/x86/kernel/acpi/wakeup_rm.S +++ b/arch/x86/kernel/acpi/wakeup_rm.S @@ -2,9 +2,11 @@   * Wrapper script for the realmode binary as a transport object   * before copying to low memory.   */ -	.section ".rodata","a" -	.globl	wakeup_code_start, wakeup_code_end -wakeup_code_start: +#include <asm/page_types.h> + +	.section ".x86_trampoline","a" +	.balign PAGE_SIZE +	.globl	acpi_wakeup_code +acpi_wakeup_code:  	.incbin	"arch/x86/kernel/acpi/realmode/wakeup.bin" -wakeup_code_end: -	.size	wakeup_code_start, .-wakeup_code_start +	.size	acpi_wakeup_code, .-acpi_wakeup_code diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 65634190ffd6..6801959a8b2a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -15,7 +15,7 @@ static u32 *flush_words;  const struct pci_device_id amd_nb_misc_ids[] = {  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, -	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },  	{}  };  EXPORT_SYMBOL(amd_nb_misc_ids); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f15c6f76071c..180ca240e03c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3983,7 +3983,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)  static __init int bad_ioapic(unsigned long address)  {  	if (nr_ioapics >= MAX_IO_APICS) { -		printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " +		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "  		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);  		return 1;  	} diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a10e516dd78d..0b4be431c620 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -227,6 +227,7 @@  #include <linux/suspend.h>  #include <linux/kthread.h>  #include <linux/jiffies.h> +#include <linux/acpi.h>  #include <asm/system.h>  #include <asm/uaccess.h> @@ -975,20 +976,10 @@ recalc:  static void apm_power_off(void)  { -	unsigned char po_bios_call[] = { -		0xb8, 0x00, 0x10,	/* movw  $0x1000,ax  */ -		0x8e, 0xd0,		/* movw  ax,ss       */ -		0xbc, 0x00, 0xf0,	/* movw  $0xf000,sp  */ -		0xb8, 0x07, 0x53,	/* movw  $0x5307,ax  */ -		0xbb, 0x01, 0x00,	/* movw  $0x0001,bx  */ -		0xb9, 0x03, 0x00,	/* movw  $0x0003,cx  */ -		0xcd, 0x15		/* int   $0x15       */ -	}; -  	/* Some bioses don't like being called from CPU != 0 */  	if (apm_info.realmode_power_off) {  		set_cpus_allowed_ptr(current, cpumask_of(0)); -		machine_real_restart(po_bios_call, sizeof(po_bios_call)); +		machine_real_restart(MRR_APM);  	} else {  		(void)set_system_power_state(APM_STATE_OFF);  	} @@ -2331,12 +2322,11 @@ static int __init apm_init(void)  		apm_info.disabled = 1;  		return -ENODEV;  	} -	if (pm_flags & PM_ACPI) { +	if (!acpi_disabled) {  		printk(KERN_NOTICE "apm: overridden by ACPI.\n");  		apm_info.disabled = 1;  		return -ENODEV;  	} -	pm_flags |= PM_APM;  	/*  	 * Set up the long jump entry point to the APM BIOS, which is called @@ -2428,7 +2418,6 @@ static void __exit apm_exit(void)  		kthread_stop(kapmd_task);  		kapmd_task = NULL;  	} -	pm_flags &= ~PM_APM;  }  module_init(apm_init); diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 4a5a42b842ad..755a31e0f5b0 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -315,8 +315,6 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)  	input.count = 4;  	input.pointer = in_params; -	input.count = 4; -	input.pointer = in_params;  	in_params[0].type               = ACPI_TYPE_BUFFER;  	in_params[0].buffer.length      = 16;  	in_params[0].buffer.pointer     = OSC_UUID; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b41f7da4555b..2368e38327b3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -630,8 +630,7 @@ static void print_basics(struct powernow_k8_data *data)  					data->powernow_table[j].frequency/1000);  			} else {  				printk(KERN_INFO PFX -					"   %d : fid 0x%x (%d MHz), vid 0x%x\n", -					j, +					"fid 0x%x (%d MHz), vid 0x%x\n",  					data->powernow_table[j].index & 0xff,  					data->powernow_table[j].frequency/1000,  					data->powernow_table[j].index >> 8); diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 8209472b27a5..83930deec3c6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)  ssize_t apei_read_mce(struct mce *m, u64 *record_id)  {  	struct cper_mce_record rcd; -	ssize_t len; - -	len = erst_read_next(&rcd.hdr, sizeof(rcd)); -	if (len <= 0) -		return len; -	/* Can not skip other records in storage via ERST unless clear them */ -	else if (len != sizeof(rcd) || -		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { -		if (printk_ratelimit()) -			pr_warning( -			"MCE-APEI: Can not skip the unknown record in ERST"); -		return -EIO; -	} - +	int rc, pos; + +	rc = erst_get_record_id_begin(&pos); +	if (rc) +		return rc; +retry: +	rc = erst_get_record_id_next(&pos, record_id); +	if (rc) +		goto out; +	/* no more record */ +	if (*record_id == APEI_ERST_INVALID_RECORD_ID) +		goto out; +	rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd)); +	/* someone else has cleared the record, try next one */ +	if (rc == -ENOENT) +		goto retry; +	else if (rc < 0) +		goto out; +	/* try to skip other type records in storage */ +	else if (rc != sizeof(rcd) || +		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) +		goto retry;  	memcpy(m, &rcd.mce, sizeof(*m)); -	*record_id = rcd.hdr.record_id; +	rc = sizeof(*m); +out: +	erst_get_record_id_end(); -	return sizeof(*m); +	return rc;  }  /* Check whether there is record in ERST */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 30612764cd3b..87eab4a27dfc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -178,8 +178,6 @@ struct cpu_hw_events {   */  #define INTEL_UEVENT_CONSTRAINT(c, n)	\  	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) -#define PEBS_EVENT_CONSTRAINT(c, n)	\ -	INTEL_UEVENT_CONSTRAINT(c, n)  #define EVENT_CONSTRAINT_END		\  	EVENT_CONSTRAINT(0, 0, 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b95c66ae4a2a..bab491b8ee25 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -362,87 +362,69 @@ static int intel_pmu_drain_bts_buffer(void)   * PEBS   */  static struct event_constraint intel_core2_pebs_event_constraints[] = { -	PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ -	PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ -	PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ -	PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ -	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),  /* MEM_LOAD_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ +	INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ +	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ +	INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ +	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */  	EVENT_CONSTRAINT_END  };  static struct event_constraint intel_atom_pebs_event_constraints[] = { -	PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ -	PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ -	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),  /* MEM_LOAD_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ +	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ +	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */  	EVENT_CONSTRAINT_END  };  static struct event_constraint intel_nehalem_pebs_event_constraints[] = { -	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),  /* MEM_INST_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),  /* MEM_UNCORE_RETIRED.* */ -	PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ -	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),  /* INST_RETIRED.ANY */ -	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),  /* UOPS_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),  /* BR_INST_RETIRED.* */ -	PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ -	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),  /* SSEX_UOPS_RETIRED.* */ -	PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ -	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),  /* MEM_LOAD_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),  /* FP_ASSIST.* */ +	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ +	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */ +	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ +	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ +	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */  	EVENT_CONSTRAINT_END  };  static struct event_constraint intel_westmere_pebs_event_constraints[] = { -	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),  /* MEM_INST_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),  /* MEM_UNCORE_RETIRED.* */ -	PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ -	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),  /* INSTR_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),  /* UOPS_RETIRED.* */ - -	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),  /* BR_INST_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),  /* BR_MISP_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),  /* SSEX_UOPS_RETIRED.* */ -	PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ -	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),  /* MEM_LOAD_RETIRED.* */ -	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),  /* FP_ASSIST.* */ +	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ +	INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ +	INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */  	EVENT_CONSTRAINT_END  };  static struct event_constraint intel_snb_pebs_events[] = { -	PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ -	PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ -	PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ -	PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ -	PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ -	PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ -	PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ -	PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ -	PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ -	PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ -	PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ -	PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ -	PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ -	PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ -	PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ -	PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ -	PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ -	PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ -	PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ -	PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ -	PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ -	PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ -	PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ -	PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ -	PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ -	PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ -	PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ -	PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ -	PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ -	PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ -	PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ -	PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ -	PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ -	PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ +	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ +	INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ +	INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ +	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ +	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ +	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ +	INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ +	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ +	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ +	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ +	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ +	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ +	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */  	EVENT_CONSTRAINT_END  }; diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index d5cd13945d5a..642f75a68cd5 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -14,9 +14,6 @@  static void *kdump_buf_page; -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; -  static inline bool is_crashed_pfn_valid(unsigned long pfn)  {  #ifndef CONFIG_X86_PAE diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 994828899e09..afa64adb75ee 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -10,9 +10,6 @@  #include <linux/uaccess.h>  #include <linux/io.h> -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; -  /**   * copy_oldmem_page - copy one page from "oldmem"   * @pfn: page frame number to be copied diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 999e2793590b..81ac6c78c01c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -322,16 +322,6 @@ void die(const char *str, struct pt_regs *regs, long err)  	oops_end(flags, regs, sig);  } -static int __init oops_setup(char *s) -{ -	if (!s) -		return -EINVAL; -	if (!strcmp(s, "panic")) -		panic_on_oops = 1; -	return 0; -} -early_param("oops", oops_setup); -  static int __init kstack_setup(char *s)  {  	if (!s) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index cdf5bfd9d4d5..3e2ef8425316 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -11,6 +11,7 @@  #include <linux/kernel.h>  #include <linux/types.h>  #include <linux/init.h> +#include <linux/crash_dump.h>  #include <linux/bootmem.h>  #include <linux/pfn.h>  #include <linux/suspend.h> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index fa41f7298c84..5c1a91974918 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1414,7 +1414,7 @@ ENTRY(async_page_fault)  	pushl_cfi $do_async_page_fault  	jmp error_code  	CFI_ENDPROC -END(apf_page_fault) +END(async_page_fault)  #endif  /* diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 7f138b3c3c52..d6d6bb361931 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -34,15 +34,6 @@ void __init i386_start_kernel(void)  {  	memblock_init(); -#ifdef CONFIG_X86_TRAMPOLINE -	/* -	 * But first pinch a few for the stack/trampoline stuff -	 * FIXME: Don't need the extra page at 4K, but need to fix -	 * trampoline before removing it. (see the GDT stuff) -	 */ -	memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); -#endif -  	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");  #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 239046bd447f..e11e39478a49 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -136,10 +136,9 @@ ident_complete:  	/* Fixup phys_base */  	addq	%rbp, phys_base(%rip) -#ifdef CONFIG_X86_TRAMPOLINE +	/* Fixup trampoline */  	addq	%rbp, trampoline_level4_pgt + 0(%rip)  	addq	%rbp, trampoline_level4_pgt + (511*8)(%rip) -#endif  	/* Due to ENTRY(), sometimes the empty space gets filled with  	 * zeros. Better take a jmp than relying on empty space being diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8dc44662394b..33c07b0b122e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -493,7 +493,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)  	native_smp_prepare_boot_cpu();  } -static void kvm_guest_cpu_online(void *dummy) +static void __cpuinit kvm_guest_cpu_online(void *dummy)  {  	kvm_guest_cpu_init();  } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b4..6c9dd922ac0d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -501,6 +501,10 @@ void set_personality_64bit(void)  	/* Make sure to be in 64bit mode */  	clear_thread_flag(TIF_IA32); +	/* Ensure the corresponding mm is not marked. */ +	if (current->mm) +		current->mm->context.ia32_compat = 0; +  	/* TBD: overwrites user setup. Should have two bits.  	   But 64bit processes have always behaved this way,  	   so it's not too bad. The main problem is just that @@ -516,6 +520,10 @@ void set_personality_ia32(void)  	set_thread_flag(TIF_IA32);  	current->personality |= force_personality32; +	/* Mark the associated mm as containing 32-bit tasks. */ +	if (current->mm) +		current->mm->context.ia32_compat = 1; +  	/* Prepare the first "return" to user space */  	current_thread_info()->status |= TS_COMPAT;  } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 715037caeb43..d3ce37edb54d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -303,68 +303,16 @@ static int __init reboot_init(void)  }  core_initcall(reboot_init); -/* The following code and data reboots the machine by switching to real -   mode and jumping to the BIOS reset entry point, as if the CPU has -   really been reset.  The previous version asked the keyboard -   controller to pulse the CPU reset line, which is more thorough, but -   doesn't work with at least one type of 486 motherboard.  It is easy -   to stop this code working; hence the copious comments. */ -static const unsigned long long -real_mode_gdt_entries [3] = -{ -	0x0000000000000000ULL,	/* Null descriptor */ -	0x00009b000000ffffULL,	/* 16-bit real-mode 64k code at 0x00000000 */ -	0x000093000100ffffULL	/* 16-bit real-mode 64k data at 0x00000100 */ -}; +extern const unsigned char machine_real_restart_asm[]; +extern const u64 machine_real_restart_gdt[3]; -static const struct desc_ptr -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, 0 }; - -/* This is 16-bit protected mode code to disable paging and the cache, -   switch to real mode and jump to the BIOS reset code. - -   The instruction that switches to real mode by writing to CR0 must be -   followed immediately by a far jump instruction, which set CS to a -   valid value for real mode, and flushes the prefetch queue to avoid -   running instructions that have already been decoded in protected -   mode. - -   Clears all the flags except ET, especially PG (paging), PE -   (protected-mode enable) and TS (task switch for coprocessor state -   save).  Flushes the TLB after paging has been disabled.  Sets CD and -   NW, to disable the cache on a 486, and invalidates the cache.  This -   is more like the state of a 486 after reset.  I don't know if -   something else should be done for other chips. - -   More could be done here to set up the registers as if a CPU reset had -   occurred; hopefully real BIOSs don't assume much. */ -static const unsigned char real_mode_switch [] = -{ -	0x66, 0x0f, 0x20, 0xc0,			/*    movl  %cr0,%eax        */ -	0x66, 0x83, 0xe0, 0x11,			/*    andl  $0x00000011,%eax */ -	0x66, 0x0d, 0x00, 0x00, 0x00, 0x60,	/*    orl   $0x60000000,%eax */ -	0x66, 0x0f, 0x22, 0xc0,			/*    movl  %eax,%cr0        */ -	0x66, 0x0f, 0x22, 0xd8,			/*    movl  %eax,%cr3        */ -	0x66, 0x0f, 0x20, 0xc3,			/*    movl  %cr0,%ebx        */ -	0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60,	/*    andl  $0x60000000,%ebx */ -	0x74, 0x02,				/*    jz    f                */ -	0x0f, 0x09,				/*    wbinvd                 */ -	0x24, 0x10,				/* f: andb  $0x10,al         */ -	0x66, 0x0f, 0x22, 0xc0			/*    movl  %eax,%cr0        */ -}; -static const unsigned char jump_to_bios [] = +void machine_real_restart(unsigned int type)  { -	0xea, 0x00, 0x00, 0xff, 0xff		/*    ljmp  $0xffff,$0x0000  */ -}; +	void *restart_va; +	unsigned long restart_pa; +	void (*restart_lowmem)(unsigned int); +	u64 *lowmem_gdt; -/* - * Switch to real mode and then execute the code - * specified by the code and length parameters. - * We assume that length will aways be less that 100! - */ -void machine_real_restart(const unsigned char *code, int length) -{  	local_irq_disable();  	/* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -392,41 +340,23 @@ void machine_real_restart(const unsigned char *code, int length)  	   too. */  	*((unsigned short *)0x472) = reboot_mode; -	/* For the switch to real mode, copy some code to low memory.  It has -	   to be in the first 64k because it is running in 16-bit mode, and it -	   has to have the same physical and virtual address, because it turns -	   off paging.  Copy it near the end of the first page, out of the way -	   of BIOS variables. */ -	memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), -		real_mode_switch, sizeof (real_mode_switch)); -	memcpy((void *)(0x1000 - 100), code, length); - -	/* Set up the IDT for real mode. */ -	load_idt(&real_mode_idt); - -	/* Set up a GDT from which we can load segment descriptors for real -	   mode.  The GDT is not used in real mode; it is just needed here to -	   prepare the descriptors. */ -	load_gdt(&real_mode_gdt); - -	/* Load the data segment registers, and thus the descriptors ready for -	   real mode.  The base address of each segment is 0x100, 16 times the -	   selector value being loaded here.  This is so that the segment -	   registers don't have to be reloaded after switching to real mode: -	   the values are consistent for real mode operation already. */ -	__asm__ __volatile__ ("movl $0x0010,%%eax\n" -				"\tmovl %%eax,%%ds\n" -				"\tmovl %%eax,%%es\n" -				"\tmovl %%eax,%%fs\n" -				"\tmovl %%eax,%%gs\n" -				"\tmovl %%eax,%%ss" : : : "eax"); - -	/* Jump to the 16-bit code that we copied earlier.  It disables paging -	   and the cache, switches to real mode, and jumps to the BIOS reset -	   entry point. */ -	__asm__ __volatile__ ("ljmp $0x0008,%0" -				: -				: "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100))); +	/* Patch the GDT in the low memory trampoline */ +	lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); + +	restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); +	restart_pa = virt_to_phys(restart_va); +	restart_lowmem = (void (*)(unsigned int))restart_pa; + +	/* GDT[0]: GDT self-pointer */ +	lowmem_gdt[0] = +		(u64)(sizeof(machine_real_restart_gdt) - 1) + +		((u64)virt_to_phys(lowmem_gdt) << 16); +	/* GDT[1]: 64K real mode code segment */ +	lowmem_gdt[1] = +		GDT_ENTRY(0x009b, restart_pa, 0xffff); + +	/* Jump to the identity-mapped low memory code */ +	restart_lowmem(type);  }  #ifdef CONFIG_APM_MODULE  EXPORT_SYMBOL(machine_real_restart); @@ -581,7 +511,7 @@ static void native_machine_emergency_restart(void)  #ifdef CONFIG_X86_32  		case BOOT_BIOS: -			machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); +			machine_real_restart(MRR_BIOS);  			reboot_type = BOOT_KBD;  			break; diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S new file mode 100644 index 000000000000..29092b38d816 --- /dev/null +++ b/arch/x86/kernel/reboot_32.S @@ -0,0 +1,135 @@ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/segment.h> +#include <asm/page_types.h> + +/* + * The following code and data reboots the machine by switching to real + * mode and jumping to the BIOS reset entry point, as if the CPU has + * really been reset.  The previous version asked the keyboard + * controller to pulse the CPU reset line, which is more thorough, but + * doesn't work with at least one type of 486 motherboard.  It is easy + * to stop this code working; hence the copious comments. + * + * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. + */ +	.section ".x86_trampoline","a" +	.balign 16 +	.code32 +ENTRY(machine_real_restart_asm) +r_base = . +	/* Get our own relocated address */ +	call	1f +1:	popl	%ebx +	subl	$1b, %ebx + +	/* Compute the equivalent real-mode segment */ +	movl	%ebx, %ecx +	shrl	$4, %ecx +	 +	/* Patch post-real-mode segment jump */ +	movw	dispatch_table(%ebx,%eax,2),%ax +	movw	%ax, 101f(%ebx) +	movw	%cx, 102f(%ebx) + +	/* Set up the IDT for real mode. */ +	lidtl	machine_real_restart_idt(%ebx) + +	/* +	 * Set up a GDT from which we can load segment descriptors for real +	 * mode.  The GDT is not used in real mode; it is just needed here to +	 * prepare the descriptors. +	 */ +	lgdtl	machine_real_restart_gdt(%ebx) + +	/* +	 * Load the data segment registers with 16-bit compatible values +	 */ +	movl	$16, %ecx +	movl	%ecx, %ds +	movl	%ecx, %es +	movl	%ecx, %fs +	movl	%ecx, %gs +	movl	%ecx, %ss +	ljmpl	$8, $1f - r_base + +/* + * This is 16-bit protected mode code to disable paging and the cache, + * switch to real mode and jump to the BIOS reset code. + * + * The instruction that switches to real mode by writing to CR0 must be + * followed immediately by a far jump instruction, which set CS to a + * valid value for real mode, and flushes the prefetch queue to avoid + * running instructions that have already been decoded in protected + * mode. + * + * Clears all the flags except ET, especially PG (paging), PE + * (protected-mode enable) and TS (task switch for coprocessor state + * save).  Flushes the TLB after paging has been disabled.  Sets CD and + * NW, to disable the cache on a 486, and invalidates the cache.  This + * is more like the state of a 486 after reset.  I don't know if + * something else should be done for other chips. + * + * More could be done here to set up the registers as if a CPU reset had + * occurred; hopefully real BIOSs don't assume much.  This is not the + * actual BIOS entry point, anyway (that is at 0xfffffff0). + * + * Most of this work is probably excessive, but it is what is tested. + */ +	.code16 +1: +	xorl	%ecx, %ecx +	movl	%cr0, %eax +	andl	$0x00000011, %eax +	orl	$0x60000000, %eax +	movl	%eax, %cr0 +	movl	%ecx, %cr3 +	movl	%cr0, %edx +	andl	$0x60000000, %edx	/* If no cache bits -> no wbinvd */ +	jz	2f +	wbinvd +2: +	andb	$0x10, %al +	movl	%eax, %cr0 +	.byte	0xea			/* ljmpw */ +101:	.word	0			/* Offset */ +102:	.word	0			/* Segment */ + +bios: +	ljmpw	$0xf000, $0xfff0 + +apm: +	movw	$0x1000, %ax +	movw	%ax, %ss +	movw	$0xf000, %sp +	movw	$0x5307, %ax +	movw	$0x0001, %bx +	movw	$0x0003, %cx +	int	$0x15 + +END(machine_real_restart_asm) + +	.balign 16 +	/* These must match <asm/reboot.h */ +dispatch_table: +	.word	bios - r_base +	.word	apm - r_base +END(dispatch_table) + +	.balign 16 +machine_real_restart_idt: +	.word	0xffff		/* Length - real mode default value */ +	.long	0		/* Base - real mode default value */ +END(machine_real_restart_idt) + +	.balign 16 +ENTRY(machine_real_restart_gdt) +	.quad	0		/* Self-pointer, filled in by PM code */ +	.quad	0		/* 16-bit code segment, filled in by PM code */ +	/* +	 * 16-bit data segment with the selector value 16 = 0x10 and +	 * base value 0x100; since this is consistent with real mode +	 * semantics we don't have to reload the segments once CR0.PE = 0. +	 */ +	.quad	GDT_ENTRY(0x0093, 0x100, 0xffff) +END(machine_real_restart_gdt) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4a52a5f9afcb..5a0484a95ad6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -619,28 +619,6 @@ void __init reserve_standard_io_resources(void)  } -/* - * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by - * is_kdump_kernel() to determine if we are booting after a panic. Hence - * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. - */ - -#ifdef CONFIG_CRASH_DUMP -/* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. This option will be passed - * by kexec loader to the capture kernel. - */ -static int __init setup_elfcorehdr(char *arg) -{ -	char *end; -	if (!arg) -		return -EINVAL; -	elfcorehdr_addr = memparse(arg, &end); -	return end > arg ? 0 : -EINVAL; -} -early_param("elfcorehdr", setup_elfcorehdr); -#endif -  static __init void reserve_ibft_region(void)  {  	unsigned long addr, size = 0; @@ -944,15 +922,8 @@ void __init setup_arch(char **cmdline_p)  	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",  			max_pfn_mapped<<PAGE_SHIFT); -	reserve_trampoline_memory(); +	setup_trampolines(); -#ifdef CONFIG_ACPI_SLEEP -	/* -	 * Reserve low memory region for sleep support. -	 * even before init_memory_mapping -	 */ -	acpi_reserve_wakeup_memory(); -#endif  	init_gbpages();  	/* max_pfn_mapped is updated here */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e9efdfd51c8d..c2871d3c71b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -711,7 +711,7 @@ do_rest:  	stack_start  = c_idle.idle->thread.sp;  	/* start_ip had better be page-aligned! */ -	start_ip = setup_trampoline(); +	start_ip = trampoline_address();  	/* So we see what's up */  	announce_cpu(cpu, apicid); @@ -721,6 +721,8 @@ do_rest:  	 * the targeted processor.  	 */ +	printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip); +  	atomic_set(&init_deasserted, 0);  	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { @@ -774,8 +776,8 @@ do_rest:  			pr_debug("CPU%d: has booted.\n", cpu);  		else {  			boot_error = 1; -			if (*((volatile unsigned char *)trampoline_base) -					== 0xA5) +			if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) +			    == 0xA5A5A5A5)  				/* trampoline started but...? */  				pr_err("CPU%d: Stuck ??\n", cpu);  			else @@ -801,7 +803,7 @@ do_rest:  	}  	/* mark "stuck" area as not stuck */ -	*((volatile unsigned long *)trampoline_base) = 0; +	*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0;  	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {  		/* diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 5f181742e8f9..abce34d5c79d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -343,3 +343,4 @@ ENTRY(sys_call_table)  	.long sys_name_to_handle_at  	.long sys_open_by_handle_at  	.long sys_clock_adjtime +	.long sys_syncfs diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a375616d77f7..a91ae7709b49 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,39 +2,41 @@  #include <linux/memblock.h>  #include <asm/trampoline.h> +#include <asm/cacheflush.h>  #include <asm/pgtable.h> -#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) -#define __trampinit -#define __trampinitdata -#else -#define __trampinit __cpuinit -#define __trampinitdata __cpuinitdata -#endif +unsigned char *x86_trampoline_base; -/* ready for x86_64 and x86 */ -unsigned char *__trampinitdata trampoline_base; - -void __init reserve_trampoline_memory(void) +void __init setup_trampolines(void)  {  	phys_addr_t mem; +	size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);  	/* Has to be in very low memory so we can execute real-mode AP code. */ -	mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); +	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);  	if (mem == MEMBLOCK_ERROR)  		panic("Cannot allocate trampoline\n"); -	trampoline_base = __va(mem); -	memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); +	x86_trampoline_base = __va(mem); +	memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); + +	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", +	       x86_trampoline_base, (unsigned long long)mem, size); + +	memcpy(x86_trampoline_base, x86_trampoline_start, size);  }  /* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. + * setup_trampolines() gets called very early, to guarantee the + * availability of low memory.  This is before the proper kernel page + * tables are set up, so we cannot set page permissions in that + * function.  Thus, we use an arch_initcall instead.   */ -unsigned long __trampinit setup_trampoline(void) +static int __init configure_trampolines(void)  { -	memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); -	return virt_to_phys(trampoline_base); +	size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); + +	set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); +	return 0;  } +arch_initcall(configure_trampolines); diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 8508237e8e43..451c0a7ef7fd 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -32,9 +32,11 @@  #include <asm/segment.h>  #include <asm/page_types.h> -/* We can free up trampoline after bootup if cpu hotplug is not supported. */ -__CPUINITRODATA -.code16 +#ifdef CONFIG_SMP + +	.section ".x86_trampoline","a" +	.balign PAGE_SIZE +	.code16  ENTRY(trampoline_data)  r_base = . @@ -44,7 +46,7 @@ r_base = .  	cli			# We should be safe anyway -	movl	$0xA5A5A5A5, trampoline_data - r_base +	movl	$0xA5A5A5A5, trampoline_status - r_base  				# write marker for master knows we're running  	/* GDT tables in non default location kernel can be beyond 16MB and @@ -72,5 +74,10 @@ boot_idt_descr:  	.word	0				# idt limit = 0  	.long	0				# idt base = 0L +ENTRY(trampoline_status) +	.long	0 +  .globl trampoline_end  trampoline_end: + +#endif /* CONFIG_SMP */ diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 075d130efcf9..09ff51799e96 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -32,13 +32,9 @@  #include <asm/segment.h>  #include <asm/processor-flags.h> -#ifdef CONFIG_ACPI_SLEEP -.section .rodata, "a", @progbits -#else -/* We can free up the trampoline after bootup if cpu hotplug is not supported. */ -__CPUINITRODATA -#endif -.code16 +	.section ".x86_trampoline","a" +	.balign PAGE_SIZE +	.code16  ENTRY(trampoline_data)  r_base = . @@ -50,7 +46,7 @@ r_base = .  	mov	%ax, %ss -	movl	$0xA5A5A5A5, trampoline_data - r_base +	movl	$0xA5A5A5A5, trampoline_status - r_base  				# write marker for master knows we're running  					# Setup stack @@ -64,10 +60,13 @@ r_base = .  	movzx	%ax, %esi		# Find the 32bit trampoline location  	shll	$4, %esi -					# Fixup the vectors -	addl	%esi, startup_32_vector - r_base -	addl	%esi, startup_64_vector - r_base -	addl	%esi, tgdt + 2 - r_base	# Fixup the gdt pointer +					# Fixup the absolute vectors +	leal	(startup_32 - r_base)(%esi), %eax +	movl	%eax, startup_32_vector - r_base +	leal	(startup_64 - r_base)(%esi), %eax +	movl	%eax, startup_64_vector - r_base +	leal	(tgdt - r_base)(%esi), %eax +	movl	%eax, (tgdt + 2 - r_base)  	/*  	 * GDT tables in non default location kernel can be beyond 16MB and @@ -129,6 +128,7 @@ no_longmode:  	jmp no_longmode  #include "verify_cpu.S" +	.balign 4  	# Careful these need to be in the same 64K segment as the above;  tidt:  	.word	0			# idt limit = 0 @@ -156,6 +156,10 @@ startup_64_vector:  	.long	startup_64 - r_base  	.word	__KERNEL_CS, 0 +	.balign 4 +ENTRY(trampoline_status) +	.long	0 +  trampoline_stack:  	.org 0x1000  trampoline_stack_end: diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0381e1f3baed..624a2016198e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -231,7 +231,7 @@ SECTIONS  	 * output PHDR, so the next output section - .init.text - should  	 * start another segment - init.  	 */ -	PERCPU_VADDR(0, :percpu) +	PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)  #endif  	INIT_TEXT_SECTION(PAGE_SIZE) @@ -241,6 +241,18 @@ SECTIONS  	INIT_DATA_SECTION(16) +	/* +	 * Code and data for a variety of lowlevel trampolines, to be +	 * copied into base memory (< 1 MiB) during initialization. +	 * Since it is copied early, the main copy can be discarded +	 * afterwards. +	 */ +	 .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { +		x86_trampoline_start = .; +		*(.x86_trampoline) +		x86_trampoline_end = .; +	} +  	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {  		__x86_cpu_dev_start = .;  		*(.x86_cpu_dev.init) @@ -292,6 +304,7 @@ SECTIONS  		*(.iommu_table)  		__iommu_table_end = .;  	} +  	. = ALIGN(8);  	/*  	 * .exit.text is discard at runtime, not link time, to deal with @@ -306,7 +319,7 @@ SECTIONS  	}  #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) -	PERCPU(PAGE_SIZE) +	PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE)  #endif  	. = ALIGN(PAGE_SIZE); | 
