diff options
Diffstat (limited to 'arch/x86')
33 files changed, 188 insertions, 135 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 61b6d51866f8..3942f74c92d7 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -17,6 +17,4 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/ obj-y += platform/ obj-y += net/ -ifeq ($(CONFIG_X86_64),y) -obj-$(CONFIG_KEXEC) += purgatory/ -endif +obj-$(CONFIG_KEXEC_FILE) += purgatory/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5d0bf1aa9dcb..36327438caf0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 def_bool y select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_FAST_MULTIPLIER select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_AOUT if X86_32 @@ -1585,9 +1586,6 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" - select BUILD_BIN2C - select CRYPTO - select CRYPTO_SHA256 ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -1602,9 +1600,22 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_FILE + bool "kexec file based system call" + select BUILD_BIN2C + depends on KEXEC + depends on X86_64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + ---help--- + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + config KEXEC_VERIFY_SIG bool "Verify kernel signature during kexec_file_load() syscall" - depends on KEXEC + depends on KEXEC_FILE ---help--- This option makes kernel signature verification mandatory for kexec_file_load() syscall. If kernel is signature can not be diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c1aa36887843..60087ca37679 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -184,11 +184,8 @@ archheaders: $(Q)$(MAKE) $(build)=arch/x86/syscalls all archprepare: -ifeq ($(CONFIG_KEXEC),y) -# Build only for 64bit. No loaders for 32bit yet. - ifeq ($(CONFIG_X86_64),y) +ifeq ($(CONFIG_KEXEC_FILE),y) $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c - endif endif ### @@ -254,6 +251,7 @@ archclean: $(Q)rm -rf $(objtree)/arch/x86_64 $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=arch/x86/tools + $(Q)$(MAKE) $(clean)=arch/x86/purgatory PHONY += kvmconfig kvmconfig: diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 7a801a310e37..0fcd9133790c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -33,8 +33,7 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone ifeq ($(CONFIG_EFI_STUB), y) - VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ - $(objtree)/drivers/firmware/efi/libstub/lib.a + VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o endif $(obj)/vmlinux: $(VMLINUX_OBJS) FORCE diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index fc6091abedb7..d39189ba7f8e 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -183,12 +183,27 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, static bool mem_avoid_overlap(struct mem_vector *img) { int i; + struct setup_data *ptr; for (i = 0; i < MEM_AVOID_MAX; i++) { if (mem_overlaps(img, &mem_avoid[i])) return true; } + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (u64)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid)) + return true; + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + return false; } diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f277184e2ac1..de8eebd6f67c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,7 +19,10 @@ static efi_system_table_t *sys_table; -struct efi_config *efi_early; +static struct efi_config *efi_early; + +#define efi_call_early(f, ...) \ + efi_early->call(efi_early->f, __VA_ARGS__); #define BOOT_SERVICES(bits) \ static void setup_boot_services##bits(struct efi_config *c) \ @@ -265,21 +268,25 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) offset = offsetof(typeof(*out), output_string); output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; func = (u64 *)output_string; - efi_early->call(*func, efi_early->text_output, str); + efi_early->call(*func, out, str); } else { struct efi_simple_text_output_protocol_32 *out; u32 *func; offset = offsetof(typeof(*out), output_string); output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; func = (u32 *)output_string; - efi_early->call(*func, efi_early->text_output, str); + efi_early->call(*func, out, str); } } +#include "../../../../drivers/firmware/efi/libstub/efi-stub-helper.c" + static void find_bits(unsigned long mask, u8 *pos, u8 *size) { u8 first, len; @@ -360,7 +367,7 @@ free_struct: return status; } -static efi_status_t +static void setup_efi_pci32(struct boot_params *params, void **pci_handle, unsigned long size) { @@ -403,8 +410,6 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle, data = (struct setup_data *)rom; } - - return status; } static efi_status_t @@ -463,7 +468,7 @@ free_struct: } -static efi_status_t +static void setup_efi_pci64(struct boot_params *params, void **pci_handle, unsigned long size) { @@ -506,11 +511,18 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle, data = (struct setup_data *)rom; } - - return status; } -static efi_status_t setup_efi_pci(struct boot_params *params) +/* + * There's no way to return an informative status from this function, + * because any analysis (and printing of error messages) needs to be + * done directly at the EFI function call-site. + * + * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we + * just didn't find any PCI devices, but there's no way to tell outside + * the context of the call. + */ +static void setup_efi_pci(struct boot_params *params) { efi_status_t status; void **pci_handle = NULL; @@ -527,7 +539,7 @@ static efi_status_t setup_efi_pci(struct boot_params *params) size, (void **)&pci_handle); if (status != EFI_SUCCESS) - return status; + return; status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto, @@ -538,13 +550,12 @@ static efi_status_t setup_efi_pci(struct boot_params *params) goto free_handle; if (efi_early->is64) - status = setup_efi_pci64(params, pci_handle, size); + setup_efi_pci64(params, pci_handle, size); else - status = setup_efi_pci32(params, pci_handle, size); + setup_efi_pci32(params, pci_handle, size); free_handle: efi_call_early(free_pool, pci_handle); - return status; } static void @@ -1032,7 +1043,6 @@ struct boot_params *make_boot_params(struct efi_config *c) int i; unsigned long ramdisk_addr; unsigned long ramdisk_size; - unsigned long initrd_addr_max; efi_early = c; sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; @@ -1095,15 +1105,20 @@ struct boot_params *make_boot_params(struct efi_config *c) memset(sdt, 0, sizeof(*sdt)); - if (hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) - initrd_addr_max = -1UL; - else - initrd_addr_max = hdr->initrd_addr_max; - status = handle_cmdline_files(sys_table, image, (char *)(unsigned long)hdr->cmd_line_ptr, - "initrd=", initrd_addr_max, + "initrd=", hdr->initrd_addr_max, &ramdisk_addr, &ramdisk_size); + + if (status != EFI_SUCCESS && + hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { + efi_printk(sys_table, "Trying to load files to higher address\n"); + status = handle_cmdline_files(sys_table, image, + (char *)(unsigned long)hdr->cmd_line_ptr, + "initrd=", -1UL, + &ramdisk_addr, &ramdisk_size); + } + if (status != EFI_SUCCESS) goto fail2; hdr->ramdisk_image = ramdisk_addr & 0xffffffff; @@ -1376,10 +1391,7 @@ struct boot_params *efi_main(struct efi_config *c, setup_graphics(boot_params); - status = setup_efi_pci(boot_params); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "setup_efi_pci() failed!\n"); - } + setup_efi_pci(boot_params); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), (void **)&gdt); diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index d487e727f1ec..c88c31ecad12 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -103,4 +103,20 @@ struct efi_uga_draw_protocol { void *blt; }; +struct efi_config { + u64 image_handle; + u64 table; + u64 allocate_pool; + u64 allocate_pages; + u64 get_memory_map; + u64 free_pool; + u64 free_pages; + u64 locate_handle; + u64 handle_protocol; + u64 exit_boot_services; + u64 text_output; + efi_status_t (*call)(unsigned long, ...); + bool is64; +} __packed; + #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 888950f29fd9..a7ccd57f19e4 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -481,7 +481,7 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } -#ifdef CONFIG_AS_AVX +#if 0 /* temporary disabled due to failing crypto tests */ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv) { @@ -1522,7 +1522,7 @@ static int __init aesni_init(void) aesni_gcm_dec_tfm = aesni_gcm_dec; } aesni_ctr_enc_tfm = aesni_ctr_enc; -#ifdef CONFIG_AS_AVX +#if 0 /* temporary disabled due to failing crypto tests */ if (cpu_has_avx) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index afcd35d331de..cfe3b954d5e4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -497,8 +497,6 @@ static __always_inline int fls64(__u64 x) #include <asm-generic/bitops/sched.h> -#define ARCH_HAS_FAST_MULTIPLIER 1 - #include <asm/arch_hweight.h> #include <asm-generic/bitops/const_hweight.h> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 044a2fd3c5fe..0ec241ede5a2 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -159,30 +159,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( } #endif /* CONFIG_EFI_MIXED */ - -/* arch specific definitions used by the stub code */ - -struct efi_config { - u64 image_handle; - u64 table; - u64 allocate_pool; - u64 allocate_pages; - u64 get_memory_map; - u64 free_pool; - u64 free_pages; - u64 locate_handle; - u64 handle_protocol; - u64 exit_boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - -extern struct efi_config *efi_early; - -#define efi_call_early(f, ...) \ - efi_early->call(efi_early->f, __VA_ARGS__); - extern bool efi_reboot_required(void); #else diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b0910f97a3ea..ffb1733ac91f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -106,14 +106,14 @@ enum fixed_addresses { __end_of_permanent_fixed_addresses, /* - * 256 temporary boot-time mappings, used by early_ioremap(), + * 512 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. * - * If necessary we round it up to the next 256 pages boundary so + * If necessary we round it up to the next 512 pages boundary so * that we can have a single pgd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 -#define FIX_BTMAPS_SLOTS 4 +#define FIX_BTMAPS_SLOTS 8 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) FIX_BTMAP_END = (__end_of_permanent_fixed_addresses ^ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 0aeed5ca356e..1733ab49ac5e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -227,6 +227,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void io_apic_eoi(unsigned int apic, unsigned int vector); +extern bool mp_should_keep_irq(struct device *dev); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 @@ -237,6 +239,7 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } static inline void mp_unmap_irq(int irq) { } +static inline bool mp_should_keep_irq(struct device *dev) { return 1; } static inline int save_ioapic_entries(void) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 572460175ba5..7c492ed9087b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 80 #define KVM_NR_FIXED_MTRR_REGION 88 -#define KVM_NR_VAR_MTRR 10 +#define KVM_NR_VAR_MTRR 8 #define ASYNC_PF_PER_VCPU 64 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0ec056012618..aa97a070f09f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -131,8 +131,13 @@ static inline int pte_exec(pte_t pte) static inline int pte_special(pte_t pte) { - return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) == - (_PAGE_PRESENT|_PAGE_SPECIAL); + /* + * See CONFIG_NUMA_BALANCING pte_numa in include/asm-generic/pgtable.h. + * On x86 we have _PAGE_BIT_NUMA == _PAGE_BIT_GLOBAL+1 == + * __PAGE_BIT_SOFTW1 == _PAGE_BIT_SPECIAL. + */ + return (pte_flags(pte) & _PAGE_SPECIAL) && + (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_PROTNONE)); } static inline unsigned long pte_pfn(pte_t pte) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 5be9063545d2..3874693c0e53 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; +extern pte_t level1_fixmap_pgt[512]; extern pgd_t init_level4_pgt[]; #define swapper_pg_dir init_level4_pgt diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b5ea75c4a4b4..ada2e2d6be3e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o @@ -118,5 +119,4 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o - obj-$(CONFIG_KEXEC) += kexec-bzimage64.o endif diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 29290f554e79..337ce5a9b15c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1070,6 +1070,11 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, } if (flags & IOAPIC_MAP_ALLOC) { + /* special handling for legacy IRQs */ + if (irq < nr_legacy_irqs() && info->count == 1 && + mp_irqdomain_map(domain, irq, pin) != 0) + irq = -1; + if (irq > 0) info->count++; else if (info->count == 0) @@ -3896,7 +3901,15 @@ int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, info->polarity = 1; } info->node = NUMA_NO_NODE; - info->set = 1; + + /* + * setup_IO_APIC_irqs() programs all legacy IRQs with default + * trigger and polarity attributes. Don't set the flag for that + * case so the first legacy IRQ user could reprogram the pin + * with real trigger and polarity attributes. + */ + if (virq >= nr_legacy_irqs() || info->count) + info->set = 1; } set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger, info->polarity); @@ -3946,6 +3959,18 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node) return ret; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM_RUNTIME + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 0553a34fa0df..a618fcd2c07d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -182,8 +182,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, safe_smp_processor_id()); } -#ifdef CONFIG_X86_64 - +#ifdef CONFIG_KEXEC_FILE static int get_nr_ram_ranges_callback(unsigned long start_pfn, unsigned long nr_pfn, void *arg) { @@ -696,5 +695,4 @@ int crash_load_segments(struct kimage *image) return ret; } - -#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 47c410d99f5d..4b0e1dfa2226 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -683,7 +683,7 @@ END(syscall_badsys) sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call -END(syscall_badsys) +END(sysenter_badsys) CFI_ENDPROC .macro FIXUP_ESPFIX_STACK diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1e6cff5814fa..44f1ed42fdf2 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -203,7 +203,7 @@ void __init native_init_IRQ(void) set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } - if (!acpi_ioapic && !of_ioapic) + if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f304773285ae..f1314d0bcf0a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * a relative jump. */ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) + if (abs(rel) > 0x7fffffff) { + __arch_remove_optimized_kprobe(op, 0); return -ERANGE; + } buf = (u8 *)op->optinsn.insn; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 8b04018e5d1f..485981059a40 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -25,9 +25,11 @@ #include <asm/debugreg.h> #include <asm/kexec-bzimage64.h> +#ifdef CONFIG_KEXEC_FILE static struct kexec_file_ops *kexec_file_loaders[] = { &kexec_bzImage64_ops, }; +#endif static void free_transition_pgtable(struct kimage *image) { @@ -178,6 +180,7 @@ static void load_segments(void) ); } +#ifdef CONFIG_KEXEC_FILE /* Update purgatory as needed after various image segments have been prepared */ static int arch_update_purgatory(struct kimage *image) { @@ -209,6 +212,12 @@ static int arch_update_purgatory(struct kimage *image) return ret; } +#else /* !CONFIG_KEXEC_FILE */ +static inline int arch_update_purgatory(struct kimage *image) +{ + return 0; +} +#endif /* CONFIG_KEXEC_FILE */ int machine_kexec_prepare(struct kimage *image) { @@ -329,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void) /* arch-dependent functionality related to kexec file-based syscall */ +#ifdef CONFIG_KEXEC_FILE int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) { @@ -522,3 +532,4 @@ overflow: (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2d872e08fab9..42a2dca984b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index bf7ef5ce29df..0fa29609b2c4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -68,6 +68,8 @@ static struct irqaction irq0 = { void __init setup_default_timer_irq(void) { + if (!nr_legacy_irqs()) + return; setup_irq(0, &irq0); } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56657b0bb3bb..03954f7900f5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1491,9 +1491,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; break; case VCPU_SREG_CS: - if (in_task_switch && rpl != dpl) - goto exception; - if (!(seg_desc.type & 8)) goto exception; @@ -4394,8 +4391,11 @@ done_prefixes: ctxt->execute = opcode.u.execute; + if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD))) + return EMULATION_FAILED; + if (unlikely(ctxt->d & - (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { + (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { /* * These are copied unconditionally here, and checked unconditionally * in x86_emulate_insn. @@ -4406,9 +4406,6 @@ done_prefixes: if (ctxt->d & NotImpl) return EMULATION_FAILED; - if (!(ctxt->d & EmulateOnUD) && ctxt->ud) - return EMULATION_FAILED; - if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) ctxt->op_bytes = 8; diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 167ffcac16ed..95a427e57887 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -48,7 +48,9 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +# ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, +# endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -71,7 +73,9 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, +# ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +# endif { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 25e7e1372bb2..919b91205cd4 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -31,7 +31,7 @@ #include <linux/sched.h> #include <asm/elf.h> -struct __read_mostly va_alignment va_align = { +struct va_alignment __read_mostly va_align = { .flags = -1, }; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 1fe33987de02..ee61c36d64f8 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -49,7 +49,13 @@ void leave_mm(int cpu) if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); load_cr3(swapper_pg_dir); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + /* + * This gets called in the idle path where RCU + * functions differently. Tracing normally + * uses RCU, so we have to call the tracepoint + * specially here. + */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } } EXPORT_SYMBOL_GPL(leave_mm); @@ -174,7 +180,7 @@ void flush_tlb_current_task(void) * * This is in units of pages. */ -unsigned long tlb_single_page_flush_ceiling = 33; +static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index c61ea57d1ba1..9a2b7101ae8a 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -326,27 +326,6 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_bus *bus; u16 config; - if (!vga_default_device()) { - resource_size_t start, end; - int i; - - /* Does firmware framebuffer belong to us? */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) - continue; - - start = pci_resource_start(pdev, i); - end = pci_resource_end(pdev, i); - - if (!start || !end) - continue; - - if (screen_info.lfb_base >= start && - (screen_info.lfb_base + screen_info.lfb_size) < end) - vga_set_default_device(pdev); - } - } - /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { @@ -371,8 +350,7 @@ static void pci_fixup_video(struct pci_dev *pdev) pci_read_config_word(pdev, PCI_COMMAND, &config); if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); - vga_set_default_device(pdev); + dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); } } } diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 3865116c51fb..b9958c364075 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -229,7 +229,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (!dev->dev.power.is_prepared && dev->irq > 0) + if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0) mp_unmap_irq(dev->irq); } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bc1a2c341891..eb500c2592ad 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1256,7 +1256,7 @@ static int pirq_enable_irq(struct pci_dev *dev) static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && !dev->dev.power.is_prepared && + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 7fde9ee438a4..899dd2454256 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -11,6 +11,7 @@ targets += purgatory.ro # sure how to relocate those. Like kexec-tools, use custom flags. KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large +KBUILD_CFLAGS += -m$(BITS) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) @@ -24,7 +25,4 @@ $(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE $(call if_changed,bin2c) -# No loaders for 32bits yet. -ifeq ($(CONFIG_X86_64),y) - obj-$(CONFIG_KEXEC) += kexec-purgatory.o -endif +obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e8a1201c3293..16fb0099b7f2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, * * We can construct this by grafting the Xen provided pagetable into * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - * NOTE: for PVH, the page tables are native. + * level2_ident_pgt, and level2_kernel_pgt. This means that only the + * kernel has a physical mapping to start with - but that's enough to + * get __va working. We need to fill in the rest of the physical + * mapping once some sort of allocator has been set up. NOTE: for + * PVH, the page tables are native. */ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { @@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); /* L3_k[510] -> level2_kernel_pgt - * L3_i[511] -> level2_fixmap_pgt */ + * L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + + /* L3_k[511][506] -> level1_fixmap_pgt */ + convert_pfn_mfn(level2_fixmap_pgt); } /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); @@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][511] have entries that point to the same + * Both L4[272][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you * are OK - which is what cleanup_highmap does) */ copy_page(level2_ident_pgt, l2); - /* Graft it onto L4[511][511] */ + /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); - /* Get [511][510] and graft that in level2_fixmap_pgt */ - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - copy_page(level2_fixmap_pgt, l2); - /* Note that we don't do anything with level1_fixmap_pgt which - * we don't need. */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); @@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, |