diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/lguest.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/lguest_hcall.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32_types.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/sleep.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/module.c (renamed from arch/x86/kernel/module_64.c) | 82 | ||||
-rw-r--r-- | arch/x86/kernel/module_32.c | 152 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/x86/lguest/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 158 | ||||
-rw-r--r-- | arch/x86/lguest/i386_head.S | 60 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 10 | ||||
-rw-r--r-- | arch/x86/power/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/power/cpu.c (renamed from arch/x86/power/cpu_64.c) | 165 | ||||
-rw-r--r-- | arch/x86/power/cpu_32.c | 148 |
21 files changed, 415 insertions, 436 deletions
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 1caf57628b9c..313389cd50d2 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -17,8 +17,13 @@ /* Pages for switcher itself, then two pages per cpu */ #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) -/* We map at -4M for ease of mapping into the guest (one PTE page). */ +/* We map at -4M (-2M when PAE is activated) for ease of mapping + * into the guest (one PTE page). */ +#ifdef CONFIG_X86_PAE +#define SWITCHER_ADDR 0xFFE00000 +#else #define SWITCHER_ADDR 0xFFC00000 +#endif /* Found in switcher.S */ extern unsigned long default_idt_entries[]; diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index faae1996487b..d31c4a684078 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -12,11 +12,13 @@ #define LHCALL_TS 8 #define LHCALL_SET_CLOCKEVENT 9 #define LHCALL_HALT 10 +#define LHCALL_SET_PMD 13 #define LHCALL_SET_PTE 14 -#define LHCALL_SET_PMD 15 +#define LHCALL_SET_PGD 15 #define LHCALL_LOAD_TLS 16 #define LHCALL_NOTIFY 17 #define LHCALL_LOAD_GDT_ENTRY 18 +#define LHCALL_SEND_INTERRUPTS 19 #define LGUEST_TRAP_ENTRY 0x1F @@ -32,10 +34,10 @@ * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism. Eighteen hypercalls are + * We use the KVM hypercall mechanism. Seventeen hypercalls are * available: the hypercall number is put in the %eax register, and the - * arguments (when required) are placed in %ebx, %ecx and %edx. If a return - * value makes sense, it's returned in %eax. + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's @@ -47,8 +49,9 @@ #define LHCALL_RING_SIZE 64 struct hcall_args { - /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ - unsigned long arg0, arg1, arg2, arg3; + /* These map directly onto eax, ebx, ecx, edx and esi + * in struct lguest_regs */ + unsigned long arg0, arg1, arg2, arg3, arg4; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..5e67c1532314 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -46,6 +46,10 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index a5ecc9c33e92..7f3eba08e7de 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -172,6 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -extern void zap_low_mappings(void); +extern void zap_low_mappings(bool early); #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4f78bd682125..f3477bb84566 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_MODULES) += module_$(BITS).o +obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 7c243a2c5115..ca93638ba430 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -104,7 +104,7 @@ int acpi_save_state_mem(void) initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; - saved_magic = 0x123456789abcdef0; + saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ return 0; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 49e0939bac42..79302e9a33a4 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1233,9 +1233,9 @@ static int suspend(int vetoable) int err; struct apm_user *as; - device_suspend(PMSG_SUSPEND); + dpm_suspend_start(PMSG_SUSPEND); - device_power_down(PMSG_SUSPEND); + dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); @@ -1259,9 +1259,9 @@ static int suspend(int vetoable) sysdev_resume(); local_irq_enable(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1277,7 +1277,7 @@ static void standby(void) { int err; - device_power_down(PMSG_SUSPEND); + dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); @@ -1291,7 +1291,7 @@ static void standby(void) sysdev_resume(); local_irq_enable(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); } static apm_event_t get_event(void) @@ -1376,7 +1376,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); queue_event(event, NULL); } ignore_normal_resume = 0; diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a830cbd7015..dfdbf6403895 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -126,6 +126,7 @@ void foo(void) #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); BLANK(); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 895c82e78455..275bc142cd5d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -968,6 +968,13 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) if (!x86_pmu.num_counters_fixed) return -1; + /* + * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86_model == 28) + return -1; + event = hwc->config & ARCH_PERFMON_EVENT_MASK; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module.c index c23880b90b5c..89f386f044e4 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module.c @@ -1,6 +1,5 @@ -/* Kernel module help for x86-64 +/* Kernel module help for x86. Copyright (C) 2001 Rusty Russell. - Copyright (C) 2002,2003 Andi Kleen, SuSE Labs. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,23 +21,18 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/slab.h> #include <linux/bug.h> +#include <linux/mm.h> #include <asm/system.h> #include <asm/page.h> #include <asm/pgtable.h> +#if 0 +#define DEBUGP printk +#else #define DEBUGP(fmt...) - -#ifndef CONFIG_UML -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} +#endif void *module_alloc(unsigned long size) { @@ -54,9 +48,15 @@ void *module_alloc(unsigned long size) if (!area) return NULL; - return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); + return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, + PAGE_KERNEL_EXEC); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); } -#endif /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, @@ -67,6 +67,58 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } +#ifdef CONFIG_X86_32 +int apply_relocate(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + uint32_t *location; + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rel[i].r_info); + + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_386_32: + /* We add the value into the location given */ + *location += sym->st_value; + break; + case R_386_PC32: + /* Add the value, subtract its postition */ + *location += sym->st_value - (uint32_t)location; + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", + me->name, ELF32_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", + me->name); + return -ENOEXEC; +} +#else /*X86_64*/ int apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -147,6 +199,8 @@ int apply_relocate(Elf_Shdr *sechdrs, return -ENOSYS; } +#endif + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c deleted file mode 100644 index 0edd819050e7..000000000000 --- a/arch/x86/kernel/module_32.c +++ /dev/null @@ -1,152 +0,0 @@ -/* Kernel module help for i386. - Copyright (C) 2001 Rusty Russell. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -#include <linux/moduleloader.h> -#include <linux/elf.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/bug.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc_exec(size); -} - - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - -/* We don't need anything special. */ -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ - return 0; -} - -int apply_relocate(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; - Elf32_Sym *sym; - uint32_t *location; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf32_Sym *)sechdrs[symindex].sh_addr - + ELF32_R_SYM(rel[i].r_info); - - switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_386_32: - /* We add the value into the location given */ - *location += sym->st_value; - break; - case R_386_PC32: - /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; - break; - default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", - me->name, ELF32_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; -} - -int apply_relocate_add(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", - me->name); - return -ENOEXEC; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; - if (!strcmp(".altinstructions", secstrings + s->sh_name)) - alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; - } - - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); - } - if (locks && text) { - void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; - alternatives_smp_module_add(me, me->name, - lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); - } - - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - - return module_bug_finalize(hdr, sechdrs, me); -} - -void module_arch_cleanup(struct module *mod) -{ - alternatives_smp_module_del(mod); - module_bug_cleanup(mod); -} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1c636bf31a7..be5ae80f897f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -301,15 +301,13 @@ static void __init reserve_brk(void) #ifdef CONFIG_BLK_DEV_INITRD -#ifdef CONFIG_X86_32 - #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; @@ -365,14 +363,13 @@ static void __init relocate_initrd(void) ramdisk_image, ramdisk_image + ramdisk_size - 1, ramdisk_here, ramdisk_here + ramdisk_size - 1); } -#endif static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = ramdisk_image + ramdisk_size; - u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) @@ -402,14 +399,8 @@ static void __init reserve_initrd(void) return; } -#ifdef CONFIG_X86_32 relocate_initrd(); -#else - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08llx > 0x%08llx)\ndisabling initrd\n", - ramdisk_end, end_of_lowmem); - initrd_start = 0; -#endif + free_early(ramdisk_image, ramdisk_end); } #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c80007ea5f7..2fecda69ee64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -873,7 +873,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) err = do_boot_cpu(apicid, cpu); - zap_low_mappings(); + zap_low_mappings(false); low_mappings = 0; #else err = do_boot_cpu(apicid, cpu); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c85b2e2bb65..367e87882041 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -108,6 +108,8 @@ SECTIONS /* Data */ . = ALIGN(PAGE_SIZE); .data : AT(ADDR(.data) - LOAD_OFFSET) { + /* Start of data section */ + _sdata = .; DATA_DATA CONSTRUCTORS diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 8dab8f7844d3..38718041efc3 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -2,7 +2,6 @@ config LGUEST_GUEST bool "Lguest guest support" select PARAVIRT depends on X86_32 - depends on !X86_PAE select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4e0c26559395..7bc65f0f62c4 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -87,7 +87,7 @@ struct lguest_data lguest_data = { /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we - * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall + * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall * arguments, and a "hcall_status" word which is 0 if the call is ready to go, * and 255 once the Host has finished with it. * @@ -96,7 +96,8 @@ struct lguest_data lguest_data = { * effect of causing the Host to run all the stored calls in the ring buffer * which empties it for next time! */ static void async_hcall(unsigned long call, unsigned long arg1, - unsigned long arg2, unsigned long arg3) + unsigned long arg2, unsigned long arg3, + unsigned long arg4) { /* Note: This code assumes we're uniprocessor. */ static unsigned int next_call; @@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ - kvm_hypercall3(call, arg1, arg2, arg3); + kvm_hypercall4(call, arg1, arg2, arg3, arg4); } else { lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg1 = arg1; lguest_data.hcalls[next_call].arg2 = arg2; lguest_data.hcalls[next_call].arg3 = arg3; + lguest_data.hcalls[next_call].arg4 = arg4; /* Arguments must all be written before we mark it to go */ wmb(); lguest_data.hcall_status[next_call] = 0; @@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall1(call, arg1); else - async_hcall(call, arg1, 0, 0); + async_hcall(call, arg1, 0, 0, 0); } static void lazy_hcall2(unsigned long call, @@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall2(call, arg1, arg2); else - async_hcall(call, arg1, arg2, 0); + async_hcall(call, arg1, arg2, 0, 0); } static void lazy_hcall3(unsigned long call, @@ -162,9 +164,23 @@ static void lazy_hcall3(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall3(call, arg1, arg2, arg3); else - async_hcall(call, arg1, arg2, arg3); + async_hcall(call, arg1, arg2, arg3, 0); } +#ifdef CONFIG_X86_PAE +static void lazy_hcall4(unsigned long call, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4) +{ + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) + kvm_hypercall4(call, arg1, arg2, arg3, arg4); + else + async_hcall(call, arg1, arg2, arg3, arg4); +} +#endif + /* When lazy mode is turned off reset the per-cpu lazy mode variable and then * issue the do-nothing hypercall to flush any stored calls. */ static void lguest_leave_lazy_mmu_mode(void) @@ -179,7 +195,7 @@ static void lguest_end_context_switch(struct task_struct *next) paravirt_end_context_switch(next); } -/*G:033 +/*G:032 * After that diversion we return to our first native-instruction * replacements: four functions for interrupt control. * @@ -199,30 +215,28 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } -PV_CALLEE_SAVE_REGS_THUNK(save_fl); - -/* restore_flags() just sets the flags back to the value given. */ -static void restore_fl(unsigned long flags) -{ - lguest_data.irq_enabled = flags; -} -PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } + +/* Let's pause a moment. Remember how I said these are called so often? + * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to + * break some rules. In particular, these functions are assumed to save their + * own registers if they need to: normal C functions assume they can trash the + * eax register. To use normal C functions, we use + * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the + * C function, then restores it. */ +PV_CALLEE_SAVE_REGS_THUNK(save_fl); PV_CALLEE_SAVE_REGS_THUNK(irq_disable); +/*:*/ -/* Interrupts go on... */ -static void irq_enable(void) -{ - lguest_data.irq_enabled = X86_EFLAGS_IF; -} -PV_CALLEE_SAVE_REGS_THUNK(irq_enable); +/* These are in i386_head.S */ +extern void lg_irq_enable(void); +extern void lg_restore_fl(unsigned long flags); -/*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, * since interrupts are rare and we'll just get the interrupt on the next timer @@ -368,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ *cx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ - *dx &= 0x07808111; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ + *dx &= 0x07808151; /* The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls @@ -388,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, if (*ax > 0x80000008) *ax = 0x80000008; break; + case 0x80000001: + /* Here we should fix nx cap depending on host. */ + /* For this version of PAE, we just clear NX bit. */ + *dx &= ~(1 << 20); + break; } } @@ -521,25 +540,52 @@ static void lguest_write_cr4(unsigned long val) static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { +#ifdef CONFIG_X86_PAE + lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, + ptep->pte_low, ptep->pte_high); +#else lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); +#endif } static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - *ptep = pteval; + native_set_pte(ptep, pteval); lguest_pte_update(mm, addr, ptep); } -/* The Guest calls this to set a top-level entry. Again, we set the entry then - * tell the Host which top-level page we changed, and the index of the entry we - * changed. */ +/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd + * to set a middle-level entry when PAE is activated. + * Again, we set the entry then tell the Host which page we changed, + * and the index of the entry we changed. */ +#ifdef CONFIG_X86_PAE +static void lguest_set_pud(pud_t *pudp, pud_t pudval) +{ + native_set_pud(pudp, pudval); + + /* 32 bytes aligned pdpt address and the index. */ + lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0, + (__pa(pudp) & 0x1F) / sizeof(pud_t)); +} + static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { - *pmdp = pmdval; + native_set_pmd(pmdp, pmdval); lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, - (__pa(pmdp) & (PAGE_SIZE - 1)) / 4); + (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); } +#else + +/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not + * activated. */ +static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + native_set_pmd(pmdp, pmdval); + lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK, + (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); +} +#endif /* There are a couple of legacy places where the kernel sets a PTE, but we * don't know the top level any more. This is useless for us, since we don't @@ -552,11 +598,31 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * which brings boot back to 0.25 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + native_set_pte(ptep, pteval); + if (cr3_changed) + lazy_hcall1(LHCALL_FLUSH_TLB, 1); +} + +#ifdef CONFIG_X86_PAE +static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte_atomic(ptep, pte); if (cr3_changed) lazy_hcall1(LHCALL_FLUSH_TLB, 1); } +void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + native_pte_clear(mm, addr, ptep); + lguest_pte_update(mm, addr, ptep); +} + +void lguest_pmd_clear(pmd_t *pmdp) +{ + lguest_set_pmd(pmdp, __pmd(0)); +} +#endif + /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on * native page table operations. On native hardware you can set a new page * table entry whenever you want, but if you want to remove one you have to do @@ -628,13 +694,12 @@ static void __init lguest_init_IRQ(void) { unsigned int i; - for (i = 0; i < LGUEST_IRQS; i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* Some systems map "vectors" to interrupts weirdly. Lguest has * a straightforward 1 to 1 mapping, so force that here. */ - __get_cpu_var(vector_irq)[vector] = i; - if (vector != SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); + __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } /* This call is required to set up for 4k stacks, where we have * separate stacks for hard and soft interrupts. */ @@ -973,10 +1038,10 @@ static void lguest_restart(char *reason) * * Our current solution is to allow the paravirt back end to optionally patch * over the indirect calls to replace them with something more efficient. We - * patch the four most commonly called functions: disable interrupts, enable - * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 - * bytes to patch into: the Guest versions of these operations are small enough - * that we can fit comfortably. + * patch two of the simplest of the most commonly called functions: disable + * interrupts and save interrupts. We usually have 6 or 10 bytes to patch + * into: the Guest versions of these operations are small enough that we can + * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, * and these are in i386_head.S. */ @@ -987,8 +1052,6 @@ static const struct lguest_insns const char *start, *end; } lguest_insns[] = { [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, - [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, - [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, }; @@ -1026,6 +1089,7 @@ __init void lguest_init(void) pv_info.name = "lguest"; pv_info.paravirt_enabled = 1; pv_info.kernel_rpl = 1; + pv_info.shared_kernel_pmd = 1; /* We set up all the lguest overrides for sensitive operations. These * are detailed with the operations themselves. */ @@ -1033,9 +1097,9 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); - pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); - pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ @@ -1071,6 +1135,12 @@ __init void lguest_init(void) pv_mmu_ops.set_pte = lguest_set_pte; pv_mmu_ops.set_pte_at = lguest_set_pte_at; pv_mmu_ops.set_pmd = lguest_set_pmd; +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic; + pv_mmu_ops.pte_clear = lguest_pte_clear; + pv_mmu_ops.pmd_clear = lguest_pmd_clear; + pv_mmu_ops.set_pud = lguest_set_pud; +#endif pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index f79541989471..a9c8cfe61cd4 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -46,10 +46,64 @@ ENTRY(lguest_entry) .globl lgstart_##name; .globl lgend_##name LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) -/*:*/ + +/*G:033 But using those wrappers is inefficient (we'll see why that doesn't + * matter for save_fl and irq_disable later). If we write our routines + * carefully in assembler, we can avoid clobbering any registers and avoid + * jumping through the wrapper functions. + * + * I skipped over our first piece of assembler, but this one is worth studying + * in a bit more detail so I'll describe in easy stages. First, the routine + * to enable interrupts: */ +ENTRY(lg_irq_enable) + /* The reverse of irq_disable, this sets lguest_data.irq_enabled to + * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ + movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled + /* But now we need to check if the Host wants to know: there might have + * been interrupts waiting to be delivered, in which case it will have + * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we + * jump to send_interrupts, otherwise we're done. */ + testl $0, lguest_data+LGUEST_DATA_irq_pending + jnz send_interrupts + /* One cool thing about x86 is that you can do many things without using + * a register. In this case, the normal path hasn't needed to save or + * restore any registers at all! */ + ret +send_interrupts: + /* OK, now we need a register: eax is used for the hypercall number, + * which is LHCALL_SEND_INTERRUPTS. + * + * We used not to bother with this pending detection at all, which was + * much simpler. Sooner or later the Host would realize it had to + * send us an interrupt. But that turns out to make performance 7 + * times worse on a simple tcp benchmark. So now we do this the hard + * way. */ + pushl %eax + movl $LHCALL_SEND_INTERRUPTS, %eax + /* This is a vmcall instruction (same thing that KVM uses). Older + * assembler versions might not know the "vmcall" instruction, so we + * create one manually here. */ + .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + popl %eax + ret + +/* Finally, the "popf" or "restore flags" routine. The %eax register holds the + * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're + * enabling interrupts again, if it's 0 we're leaving them off. */ +ENTRY(lg_restore_fl) + /* This is just "lguest_data.irq_enabled = flags;" */ + movl %eax, lguest_data+LGUEST_DATA_irq_enabled + /* Now, if the %eax value has enabled interrupts and + * lguest_data.irq_pending is set, we want to tell the Host so it can + * deliver any outstanding interrupts. Fortunately, both values will + * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" + * instruction will AND them together for us. If both are set, we + * jump to send_interrupts. */ + testl lguest_data+LGUEST_DATA_irq_pending, %eax + jnz send_interrupts + /* Again, the normal path has used no extra registers. Clever, huh? */ + ret /* These demark the EIP range where host should never deliver interrupts. */ .global lguest_noirq_start diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 949708d7a481..9ff3c0816d15 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -564,7 +564,7 @@ static inline void save_pg_dir(void) } #endif /* !CONFIG_ACPI_SLEEP */ -void zap_low_mappings(void) +void zap_low_mappings(bool early) { int i; @@ -581,7 +581,11 @@ void zap_low_mappings(void) set_pgd(swapper_pg_dir+i, __pgd(0)); #endif } - flush_tlb_all(); + + if (early) + __flush_tlb(); + else + flush_tlb_all(); } pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); @@ -956,7 +960,7 @@ void __init mem_init(void) test_wp_bit(); save_pg_dir(); - zap_low_mappings(); + zap_low_mappings(true); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index 58b32db33125..de2abbd07544 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -3,5 +3,5 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_cpu_$(BITS).o := $(nostackp) -obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o +obj-$(CONFIG_PM_SLEEP) += cpu.o obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu.c index 5343540f2607..d277ef1eea51 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu.c @@ -1,5 +1,5 @@ /* - * Suspend and hibernation support for x86-64 + * Suspend support specific for i386/x86-64. * * Distribute under GPLv2 * @@ -8,18 +8,28 @@ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> */ -#include <linux/smp.h> #include <linux/suspend.h> -#include <asm/proto.h> -#include <asm/page.h> +#include <linux/smp.h> + #include <asm/pgtable.h> +#include <asm/proto.h> #include <asm/mtrr.h> +#include <asm/page.h> +#include <asm/mce.h> #include <asm/xcr.h> #include <asm/suspend.h> -static void fix_processor_context(void); +#ifdef CONFIG_X86_32 +static struct saved_context saved_context; +unsigned long saved_context_ebx; +unsigned long saved_context_esp, saved_context_ebp; +unsigned long saved_context_esi, saved_context_edi; +unsigned long saved_context_eflags; +#else +/* CONFIG_X86_64 */ struct saved_context saved_context; +#endif /** * __save_processor_state - save CPU registers before creating a @@ -38,19 +48,35 @@ struct saved_context saved_context; */ static void __save_processor_state(struct saved_context *ctxt) { +#ifdef CONFIG_X86_32 + mtrr_save_fixed_ranges(NULL); +#endif kernel_fpu_begin(); /* * descriptor tables */ +#ifdef CONFIG_X86_32 + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ store_gdt((struct desc_ptr *)&ctxt->gdt_limit); store_idt((struct desc_ptr *)&ctxt->idt_limit); +#endif store_tr(ctxt->tr); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ /* * segment registers */ +#ifdef CONFIG_X86_32 + savesegment(es, ctxt->es); + savesegment(fs, ctxt->fs); + savesegment(gs, ctxt->gs); + savesegment(ss, ctxt->ss); +#else +/* CONFIG_X86_64 */ asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); @@ -62,30 +88,87 @@ static void __save_processor_state(struct saved_context *ctxt) rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); mtrr_save_fixed_ranges(NULL); + rdmsrl(MSR_EFER, ctxt->efer); +#endif + /* * control registers */ - rdmsrl(MSR_EFER, ctxt->efer); ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); +#ifdef CONFIG_X86_32 + ctxt->cr4 = read_cr4_safe(); +#else +/* CONFIG_X86_64 */ ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); +#endif } +/* Needed by apm.c */ void save_processor_state(void) { __save_processor_state(&saved_context); } +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(save_processor_state); +#endif static void do_fpu_end(void) { /* - * Restore FPU regs if necessary + * Restore FPU regs if necessary. */ kernel_fpu_end(); } +static void fix_processor_context(void) +{ + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be + * necessary. But... This is necessary, because + * 386 hardware has concept of busy TSS or some + * similar stupidity. + */ + +#ifdef CONFIG_X86_64 + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + + syscall_init(); /* This sets MSR_*STAR and related */ +#endif + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg7) { +#ifdef CONFIG_X86_32 + set_debugreg(current->thread.debugreg0, 0); + set_debugreg(current->thread.debugreg1, 1); + set_debugreg(current->thread.debugreg2, 2); + set_debugreg(current->thread.debugreg3, 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(current->thread.debugreg7, 7); +#else + /* CONFIG_X86_64 */ + loaddebug(¤t->thread, 0); + loaddebug(¤t->thread, 1); + loaddebug(¤t->thread, 2); + loaddebug(¤t->thread, 3); + /* no 4 and 5 */ + loaddebug(¤t->thread, 6); + loaddebug(¤t->thread, 7); +#endif + } + +} + /** * __restore_processor_state - restore the contents of CPU registers saved * by __save_processor_state() @@ -96,9 +179,16 @@ static void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ + /* cr4 was introduced in the Pentium CPU */ +#ifdef CONFIG_X86_32 + if (ctxt->cr4) + write_cr4(ctxt->cr4); +#else +/* CONFIG X86_64 */ wrmsrl(MSR_EFER, ctxt->efer); write_cr8(ctxt->cr8); write_cr4(ctxt->cr4); +#endif write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); @@ -107,13 +197,31 @@ static void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ +#ifdef CONFIG_X86_32 + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); load_idt((const struct desc_ptr *)&ctxt->idt_limit); - +#endif /* * segment registers */ +#ifdef CONFIG_X86_32 + loadsegment(es, ctxt->es); + loadsegment(fs, ctxt->fs); + loadsegment(gs, ctxt->gs); + loadsegment(ss, ctxt->ss); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); +#else +/* CONFIG_X86_64 */ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); @@ -123,6 +231,7 @@ static void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_FS_BASE, ctxt->fs_base); wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); +#endif /* * restore XCR0 for xsave capable cpu's. @@ -134,41 +243,17 @@ static void __restore_processor_state(struct saved_context *ctxt) do_fpu_end(); mtrr_ap_init(); + +#ifdef CONFIG_X86_32 + mcheck_init(&boot_cpu_data); +#endif } +/* Needed by apm.c */ void restore_processor_state(void) { __restore_processor_state(&saved_context); } - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - - /* - * This just modifies memory; should not be necessary. But... This - * is necessary, because 386 hardware has concept of busy TSS or some - * similar stupidity. - */ - set_tss_desc(cpu, t); - - get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; - - syscall_init(); /* This sets MSR_*STAR and related */ - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } -} +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(restore_processor_state); +#endif diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c deleted file mode 100644 index ce702c5b3a2c..000000000000 --- a/arch/x86/power/cpu_32.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Suspend support specific for i386. - * - * Distribute under GPLv2 - * - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> - * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> - */ - -#include <linux/module.h> -#include <linux/suspend.h> -#include <asm/mtrr.h> -#include <asm/mce.h> -#include <asm/xcr.h> -#include <asm/suspend.h> - -static struct saved_context saved_context; - -unsigned long saved_context_ebx; -unsigned long saved_context_esp, saved_context_ebp; -unsigned long saved_context_esi, saved_context_edi; -unsigned long saved_context_eflags; - -static void __save_processor_state(struct saved_context *ctxt) -{ - mtrr_save_fixed_ranges(NULL); - kernel_fpu_begin(); - - /* - * descriptor tables - */ - store_gdt(&ctxt->gdt); - store_idt(&ctxt->idt); - store_tr(ctxt->tr); - - /* - * segment registers - */ - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); - savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); - - /* - * control registers - */ - ctxt->cr0 = read_cr0(); - ctxt->cr2 = read_cr2(); - ctxt->cr3 = read_cr3(); - ctxt->cr4 = read_cr4_safe(); -} - -/* Needed by apm.c */ -void save_processor_state(void) -{ - __save_processor_state(&saved_context); -} -EXPORT_SYMBOL(save_processor_state); - -static void do_fpu_end(void) -{ - /* - * Restore FPU regs if necessary. - */ - kernel_fpu_end(); -} - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - - set_tss_desc(cpu, t); /* - * This just modifies memory; should not be - * necessary. But... This is necessary, because - * 386 hardware has concept of busy TSS or some - * similar stupidity. - */ - - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } - -} - -static void __restore_processor_state(struct saved_context *ctxt) -{ - /* - * control registers - */ - /* cr4 was introduced in the Pentium CPU */ - if (ctxt->cr4) - write_cr4(ctxt->cr4); - write_cr3(ctxt->cr3); - write_cr2(ctxt->cr2); - write_cr0(ctxt->cr0); - - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - load_gdt(&ctxt->gdt); - load_idt(&ctxt->idt); - - /* - * segment registers - */ - loadsegment(es, ctxt->es); - loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); - - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); - - fix_processor_context(); - do_fpu_end(); - mtrr_ap_init(); - mcheck_init(&boot_cpu_data); -} - -/* Needed by apm.c */ -void restore_processor_state(void) -{ - __restore_processor_state(&saved_context); -} -EXPORT_SYMBOL(restore_processor_state); |