diff options
author | Jens Axboe <axboe@kernel.dk> | 2012-05-01 14:29:55 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-05-01 14:29:55 +0200 |
commit | 0b7877d4eea3f93e3dd941999522bbd8c538cb53 (patch) | |
tree | ade6d4e411b9b9b569c802e3b2179826162c934c /arch/x86 | |
parent | bd1a68b59c8e3bce45fb76632c64e1e063c3962d (diff) | |
parent | 69964ea4c7b68c9399f7977aa5b9aa6539a6a98a (diff) |
Merge tag 'v3.4-rc5' into for-3.5/core
The core branch is behind driver commits that we want to build
on for 3.5, hence I'm pulling in a later -rc.
Linux 3.4-rc5
Conflicts:
Documentation/feature-removal-schedule.txt
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'arch/x86')
62 files changed, 685 insertions, 547 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 968dbe24a255..41a7237606a3 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -129,6 +129,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 4be406abeefd..36b62bc52638 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32) export LDFLAGS +LDS_EXTRA := -Ui386 +export LDS_EXTRA + # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. include $(srctree)/arch/x86/Makefile_32.cpu diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index a0559930a180..c85e3ac99bba 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -33,6 +33,9 @@ __HEAD ENTRY(startup_32) #ifdef CONFIG_EFI_STUB + jmp preferred_addr + + .balign 0x10 /* * We don't need the return address, so set up the stack so * efi_main() can find its arugments. @@ -41,12 +44,17 @@ ENTRY(startup_32) call efi_main cmpl $0, %eax - je preferred_addr movl %eax, %esi - call 1f + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popl %eax - subl $1b, %eax + subl $3b, %eax subl BP_pref_address(%esi), %eax add BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 558d76ce23bc..87e03a13d8e3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -200,18 +200,28 @@ ENTRY(startup_64) * entire text+data+bss and hopefully all of memory. */ #ifdef CONFIG_EFI_STUB - pushq %rsi + /* + * The entry point for the PE/COFF executable is 0x210, so only + * legacy boot loaders will execute this jmp. + */ + jmp preferred_addr + + .org 0x210 mov %rcx, %rdi mov %rdx, %rsi call efi_main - popq %rsi - cmpq $0,%rax - je preferred_addr movq %rax,%rsi - call 1f + cmpq $0,%rax + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popq %rax - subq $1b, %rax + subq $3b, %rax subq BP_pref_address(%rsi), %rax add BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index ed549767a231..24443a332083 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -205,8 +205,13 @@ int main(int argc, char ** argv) put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); #ifdef CONFIG_X86_32 - /* Address of entry point */ - put_unaligned_le32(i, &buf[pe_header + 0x28]); + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); @@ -217,9 +222,11 @@ int main(int argc, char ** argv) /* * Address of entry point. startup_32 is at the beginning and * the 64-bit entry point (startup_64) is always 512 bytes - * after. + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation */ - put_unaligned_le32(i + 512, &buf[pe_header + 0x28]); + put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d511d951a052..4824fb45560f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end) end = PAGE_ALIGN(end); if (end <= start) return; - down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + vm_brk(start, end - start); } #ifdef CORE_DUMP @@ -332,9 +330,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) pos = 32; map_size = ex.a_text+ex.a_data; - down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); - up_write(¤t->mm->mmap_sem); + error = vm_brk(text_addr & PAGE_MASK, map_size); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); @@ -373,9 +369,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; - down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - up_write(¤t->mm->mmap_sem); + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text+ex.a_data, &pos); @@ -385,26 +379,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto beyond_if; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset); - up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -476,9 +466,7 @@ static int load_aout_library(struct file *file) error_time = jiffies; } #endif - down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - up_write(¤t->mm->mmap_sem); + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); @@ -490,12 +478,10 @@ static int load_aout_library(struct file *file) goto out; } /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, N_TXTOFF(ex)); - up_write(¤t->mm->mmap_sem); retval = error; if (error != start_addr) goto out; @@ -503,9 +489,7 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); - up_write(¤t->mm->mmap_sem); + error = vm_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) goto out; diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b3b733262909..99480e55973d 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -43,7 +43,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock #op "b %b0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ + : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ @@ -173,7 +173,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock "addb %b1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ + : "+m" (*(ptr)) : "qi" (inc) \ : "memory", "cc"); \ break; \ case __X86_CASE_W: \ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ed3065fd6314..4b4331d71935 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag); + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs); static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { @@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) return gfp; } +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *memory; @@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!is_device_dma_capable(dev)) return NULL; - if (!ops->alloc_coherent) + if (!ops->alloc) return NULL; - memory = ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp), attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, memory); return memory; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, bus); - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); + if (ops->free) + ops->free(dev, size, vaddr, bus, attrs); } #endif diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index 3427b7798dbc..7ef7c3020e5c 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,9 +7,9 @@ #else # ifdef __i386__ # include "posix_types_32.h" -# elif defined(__LP64__) -# include "posix_types_64.h" -# else +# elif defined(__ILP32__) # include "posix_types_x32.h" +# else +# include "posix_types_64.h" # endif #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7284c9a6a0b5..4fa7dcceb6c0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -974,16 +974,6 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ -#ifdef CONFIG_X86_32 -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -#endif - -void disable_hlt(void); -void enable_hlt(void); - void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 4a085383af27..5ca71c065eef 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -257,7 +257,7 @@ struct sigcontext { __u64 oldmask; __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ -#ifndef __LP64__ +#ifdef __ILP32__ __u32 __fpstate_pad; #endif __u64 reserved1[8]; diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h index fc1aa5535646..34c47b3341c0 100644 --- a/arch/x86/include/asm/siginfo.h +++ b/arch/x86/include/asm/siginfo.h @@ -2,7 +2,13 @@ #define _ASM_X86_SIGINFO_H #ifdef __x86_64__ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# ifdef __ILP32__ /* x32 */ +typedef long long __kernel_si_clock_t __attribute__((aligned(4))); +# define __ARCH_SI_CLOCK_T __kernel_si_clock_t +# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) +# else /* x86-64 */ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# endif #endif #include <asm-generic/siginfo.h> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d9360..e0544597cfe7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc602..8084bc73b18c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971ad..fcd4b6f3ef02 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 37cdc9d99bb1..4437001d8e3d 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -63,10 +63,10 @@ #else # ifdef __i386__ # include <asm/unistd_32.h> -# elif defined(__LP64__) -# include <asm/unistd_64.h> -# else +# elif defined(__ILP32__) # include <asm/unistd_x32.h> +# else +# include <asm/unistd_64.h> # endif #endif diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..6fe6767b7124 --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h @@ -0,0 +1,46 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This is largely generic for little-endian machines, but the + * optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index baaca8defec8..764b66a4cf89 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -195,6 +195,5 @@ extern struct x86_msi_ops x86_msi; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); -extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node); #endif diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 103b6ab368d3..146a49c763a4 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags; static char temp_stack[4096]; #endif +asmlinkage void acpi_enter_s3(void) +{ + acpi_enter_sleep_state(3, wake_sleep_flags); +} /** * acpi_suspend_lowlevel - save kernel state * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 416d4be13fef..d68677a2a010 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,12 +3,16 @@ */ #include <asm/trampoline.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; extern int wakeup_pmode_return; +extern u8 wake_sleep_flags; +extern asmlinkage void acpi_enter_s3(void); + extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab720573e3..72610839f03b 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -74,9 +74,7 @@ restore_registers: ENTRY(do_suspend_lowlevel) call save_processor_state call save_registers - pushl $3 - call acpi_enter_sleep_state - addl $4, %esp + call acpi_enter_s3 # In case of S3 failure, we'll emerge here. Jump # to ret_point to recover diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8ea5164cbd04..014d1d28c397 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel) movq %rsi, saved_rsi addq $8, %rsp - movl $3, %edi - xorl %eax, %eax - call acpi_enter_sleep_state + call acpi_enter_s3 /* in case something went wrong, restore the machine status and go on */ jmp resume_point diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b1e7c7f7a0af..e66311200cbd 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -477,7 +477,7 @@ error: /* allocate and map a coherent mapping */ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag) + gfp_t flag, struct dma_attrs *attrs) { dma_addr_t paddr; unsigned long align_mask; @@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, } __free_pages(page, get_order(size)); } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag); + return dma_generic_alloc_coherent(dev, size, dma_addr, flag, + attrs); return NULL; } @@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, /* free a coherent mapping */ static void gart_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, get_order(size)); @@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = { .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, .unmap_page = gart_unmap_page, - .alloc_coherent = gart_alloc_coherent, - .free_coherent = gart_free_coherent, + .alloc = gart_alloc_coherent, + .free = gart_free_coherent, .mapping_error = gart_mapping_error, }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11544d8f1e97..edc24480469f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1637,9 +1637,11 @@ static int __init apic_verify(void) mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + } pr_info("Found and enabled local APIC!\n"); return 0; @@ -1657,13 +1659,15 @@ int __init apic_force_enable(unsigned long addr) * MSR. This can only be done in software for Intel P6 or later * and AMD K7 (Model > 1) or later. */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | addr; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | addr; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } } return apic_verify(); } @@ -2209,10 +2213,12 @@ static void lapic_resume(void) * FIXME! This will be wrong if we ever support suspend on * SMP! We'll need to do this as part of the CPU restore! */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } } maxlvt = lapic_get_maxlvt(); diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 899803e03214..23e75422e013 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -207,8 +207,11 @@ static void __init map_csrs(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + + if (c->phys_proc_id != node) { + c->phys_proc_id = node; + per_cpu(cpu_llc_id, smp_processor_id()) = node; + } } static int __init numachip_system_init(void) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8a778db45e3a..991e315f4227 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (x2apic_phys) return x2apic_enabled(); + else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && + x2apic_enabled()) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return 1; + } else return 0; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0a44b90602b0..1c67ca100e4c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,7 +26,8 @@ * contact AMD for precise details and a CPU swap. * * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html + * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" + * (Publication # 21266 Issue Date: August 1998) * * The following test is erm.. interesting. AMD neglected to up * the chip setting when fixing the bug but they also tweaked some @@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) "system stability may be impaired when more than 32 MB are used.\n"); else printk(KERN_CONT "probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } /* K6 with old style WHCR */ @@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = per_cpu(cpu_llc_id, cpu); /* - * If core numbers are inconsistent, it's likely a multi-fabric platform, - * so invoke platform-specific handler + * On multi-fabric platform (e.g. Numascale NumaChip) a + * platform-specific handler needs to be called to fixup some + * IDs of the CPU. */ - if (c->phys_proc_id != node) + if (x86_cpuinit.fixup_cpu_id) x86_cpuinit.fixup_cpu_id(c, node); if (!node_online(node)) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e258362a3d..cf79302198a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1163,15 +1163,6 @@ static void dbg_restore_debug_regs(void) #endif /* ! CONFIG_KGDB */ /* - * Prints an error where the NUMA and configured core-number mismatch and the - * platform didn't override this to fix it up - */ -void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node) -{ - pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id); -} - -/* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 73d08ed98a64..b8f3653dddbc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, /* check if @slot is already used or the index is already disabled */ ret = amd_get_l3_disable_slot(nb, slot); if (ret >= 0) - return -EINVAL; + return -EEXIST; if (index > nb->l3_cache.indices) return -EINVAL; /* check whether the other slot has disabled the same index already */ if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EINVAL; + return -EEXIST; amd_l3_disable_index(nb, cpu, slot, index); @@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); if (err) { if (err == -EEXIST) - printk(KERN_WARNING "L3 disable slot %d in use!\n", - slot); + pr_warning("L3 slot %d in use/index already disabled!\n", + slot); return err; } return count; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ef484d9d0a25..a2dfacfd7103 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1271,6 +1271,17 @@ done: return num ? -EINVAL : 0; } +PMU_FORMAT_ATTR(cccr, "config:0-31" ); +PMU_FORMAT_ATTR(escr, "config:32-62"); +PMU_FORMAT_ATTR(ht, "config:63" ); + +static struct attribute *intel_p4_formats_attr[] = { + &format_attr_cccr.attr, + &format_attr_escr.attr, + &format_attr_ht.attr, + NULL, +}; + static __initconst const struct x86_pmu p4_pmu = { .name = "Netburst P4/Xeon", .handle_irq = p4_pmu_handle_irq, @@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = { * the former idea is taken from OProfile code */ .perfctr_second_write = 1, + + .format_attrs = intel_p4_formats_attr, }; __init int p4_pmu_init(void) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7734bcbb5a3a..2d6e6498c176 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -235,6 +235,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); + tsk->thread.fpu.last_cpu = ~0; return 0; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7943e0c21bde..3dafc6003b7c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -282,8 +282,13 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; + /* + * We unmask if the irq was not marked masked by the + * core code. That respects the lazy irq disable + * behaviour. + */ if (!irqd_can_move_in_process_context(data) && - !irqd_irq_disabled(data) && chip->irq_unmask) + !irqd_irq_masked(data) && chip->irq_unmask) chip->irq_unmask(data); raw_spin_unlock(&desc->lock); diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 90fcf62854bb..1d5d31ea686b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, return count; } -static int setup_data_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - - return 0; -} - static const struct file_operations fops_setup_data = { .read = setup_data_read, - .open = setup_data_open, + .open = simple_open, .llseek = default_llseek, }; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index db6720edfdd0..8bfb6146f753 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,8 @@ #include <linux/smp.h> #include <linux/nmi.h> #include <linux/hw_breakpoint.h> +#include <linux/uaccess.h> +#include <linux/memory.h> #include <asm/debugreg.h> #include <asm/apicdef.h> @@ -741,6 +743,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + char opc[BREAK_INSTR_SIZE]; + + bpt->type = BP_BREAKPOINT; + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + err = probe_kernel_write((char *)bpt->bpt_addr, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); +#ifdef CONFIG_DEBUG_RODATA + if (!err) + return err; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + return -EBUSY; + text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err) + return err; + if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) + return -EINVAL; + bpt->type = BP_POKE_BREAKPOINT; +#endif /* CONFIG_DEBUG_RODATA */ + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ +#ifdef CONFIG_DEBUG_RODATA + int err; + char opc[BREAK_INSTR_SIZE]; + + if (bpt->type != BP_POKE_BREAKPOINT) + goto knl_write; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + goto knl_write; + text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) + goto knl_write; + return err; +knl_write: +#endif /* CONFIG_DEBUG_RODATA */ + return probe_kernel_write((char *)bpt->bpt_addr, + (char *)bpt->saved_instr, BREAK_INSTR_SIZE); +} + struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 694d801bf606..b8ba6e4a27e4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/tlbflush.h> +#include <asm/idle.h> static int kvmapf = 1; @@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) kvm_async_pf_task_wait((u32)read_cr2()); break; case KVM_PV_REASON_PAGE_READY: + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); + rcu_irq_exit(); break; } } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 73465aab28f8..8a2ce8fd41c0 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -82,11 +82,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); - return -1; - } - csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); @@ -380,6 +375,13 @@ static struct microcode_ops microcode_amd_ops = { struct microcode_ops * __init init_amd_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + return NULL; + } + patch = (void *)get_zeroed_page(GFP_KERNEL); if (!patch) return NULL; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 87a0f8688301..c9bda6d6035c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -419,10 +419,8 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) { - sysfs_remove_group(&dev->kobj, &mc_attr_group); + if (microcode_init_cpu(cpu) == UCODE_ERROR) return -EINVAL; - } return err; } @@ -528,11 +526,11 @@ static int __init microcode_init(void) microcode_ops = init_intel_microcode(); else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); - - if (!microcode_ops) { + else pr_err("no support for this CPU vendor\n"); + + if (!microcode_ops) return -ENODEV; - } microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6ac5782f4d6b..d0b2fb9ccbb1 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -430,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, } static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { void *ret = NULL; dma_addr_t mapping; @@ -463,7 +463,8 @@ error: } static void calgary_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); @@ -476,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size, } static struct dma_map_ops calgary_dma_ops = { - .alloc_coherent = calgary_alloc_coherent, - .free_coherent = calgary_free_coherent, + .alloc = calgary_alloc_coherent, + .free = calgary_free_coherent, .map_sg = calgary_map_sg, .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 28e5e06fcba4..3003250ac51d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void) } } void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag) + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs) { unsigned long dma_mask; struct page *page; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3af4af810c07..f96050685b46 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, } static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { free_pages((unsigned long)vaddr, get_order(size)); } @@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev, } struct dma_map_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, + .alloc = dma_generic_alloc_coherent, + .free = nommu_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 8f972cbddef0..6c483ba98b9c 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -15,21 +15,30 @@ int swiotlb __read_mostly; static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { void *vaddr; - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, + attrs); if (vaddr) return vaddr; return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } +static void x86_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + static struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = x86_swiotlb_alloc_coherent, - .free_coherent = swiotlb_free_coherent, + .alloc = x86_swiotlb_alloc_coherent, + .free = x86_swiotlb_free_coherent, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index a33afaa5ddb7..1d92a5ab6e8b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -362,34 +362,10 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); #endif -#ifdef CONFIG_X86_32 -/* - * This halt magic was a workaround for ancient floppy DMA - * wreckage. It should be safe to remove. - */ -static int hlt_counter; -void disable_hlt(void) -{ - hlt_counter++; -} -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} -EXPORT_SYMBOL(enable_hlt); - -static inline int hlt_use_halt(void) -{ - return (!hlt_counter && boot_cpu_data.hlt_works_ok); -} -#else static inline int hlt_use_halt(void) { return 1; } -#endif #ifndef CONFIG_SMP static inline void play_dead(void) diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index f386dc49f988..7515cf0e1805 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) current_thread_info()->sig_on_uaccess_error = 1; /* - * 0 is a valid user pointer (in the access_ok sense) on 32-bit and + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, 0 means "don't write anything" not "write it at + * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ ret = -EFAULT; @@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, - 0); + NULL); break; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e9f265fd79ae..9cf71d0b2d37 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -93,7 +93,6 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, - .fixup_cpu_id = x86_default_fixup_cpu_id, }; static void default_nmi_init(void) { }; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a73f0c104813..2e88438ffd83 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -369,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) case MSR_CORE_PERF_FIXED_CTR_CTRL: if (pmu->fixed_ctr_ctrl == data) return 0; - if (!(data & 0xfffffffffffff444)) { + if (!(data & 0xfffffffffffff444ull)) { reprogram_fixed_counters(pmu, data); return 0; } @@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); if (pmu->version == 1) { - pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; - return; + pmu->nr_arch_fixed_counters = 0; + } else { + pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), + X86_PMC_MAX_FIXED); + pmu->counter_bitmask[KVM_PMC_FIXED] = + ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; } - pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), - X86_PMC_MAX_FIXED); - pmu->counter_bitmask[KVM_PMC_FIXED] = - ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; - pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) - | (((1ull << pmu->nr_arch_fixed_counters) - 1) - << X86_PMC_IDX_FIXED)); + pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | + (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); + pmu->global_ctrl_mask = ~pmu->global_ctrl; } void kvm_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 280751c84724..4ff0ab9bc3c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); kvm_set_shared_msr(msr->index, msr->data, msr->mask); + preempt_enable(); + } break; } ret = kvm_set_msr_common(vcpu, msr_index, data); @@ -3906,7 +3909,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0bf7c1..91a5e989abcf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (npages && !old.rmap) { unsigned long userspace_addr; - down_write(¤t->mm->mmap_sem); - userspace_addr = do_mmap(NULL, 0, + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, map_flags, 0); - up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)userspace_addr)) return PTR_ERR((void *)userspace_addr); @@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!user_alloc && !old.user_alloc && old.rmap && !npages) { int ret; - down_write(¤t->mm->mmap_sem); - ret = do_munmap(current->mm, old.userspace_addr, + ret = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); - up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 25feb1ae71c5..b1e6c4b2e8eb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -379,8 +379,8 @@ err_out: return; } -/* Decode moffset16/32/64 */ -static void __get_moffset(struct insn *insn) +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: @@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn) insn->moffset2.value = get_next(int, insn); insn->moffset2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->moffset1.got = insn->moffset2.got = 1; + return 1; + err_out: - return; + return 0; } -/* Decode imm v32(Iz) */ -static void __get_immv32(struct insn *insn) +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn) insn->immediate.value = get_next(int, insn); insn->immediate.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } + return 1; + err_out: - return; + return 0; } -/* Decode imm v64(Iv/Ov) */ -static void __get_immv(struct insn *insn) +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn) insn->immediate2.value = get_next(int, insn); insn->immediate2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /* Decode ptr16:16/32(Ap) */ -static void __get_immptr(struct insn *insn) +static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn) break; case 8: /* ptr16:64 is not exist (no segment) */ - return; + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate2.value = get_next(unsigned short, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /** @@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn) insn_get_displacement(insn); if (inat_has_moffset(insn->attr)) { - __get_moffset(insn); + if (!__get_moffset(insn)) + goto err_out; goto done; } @@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn) insn->immediate2.nbytes = 4; break; case INAT_IMM_PTR: - __get_immptr(insn); + if (!__get_immptr(insn)) + goto err_out; break; case INAT_IMM_VWORD32: - __get_immv32(insn); + if (!__get_immv32(insn)) + goto err_out; break; case INAT_IMM_VWORD: - __get_immv(insn); + if (!__get_immv(insn)) + goto err_out; break; default: - break; + /* Here, insn must have an immediate, but failed */ + goto err_out; } if (inat_has_second_immediate(insn->attr)) { insn->immediate2.value = get_next(char, insn); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb54483..d6ae30bbd7bb 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include <linux/highmem.h> #include <linux/module.h> +#include <asm/word-at-a-time.h> + /* * best effort, GUP based copy_from_user() that is NMI-safe */ @@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); + +static inline unsigned long count_bytes(unsigned long mask) +{ + mask = (mask - 1) & ~mask; + mask >>= 7; + return count_masked_bytes(mask); +} + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) +{ + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + while (max >= sizeof(unsigned long)) { + unsigned long c; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + /* This can write a few bytes past the NUL character, but that's ok */ + *(unsigned long *)(dst+res) = c; + c = has_zero(c); + if (c) + return res + count_bytes(c); + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return res; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = current_thread_info()->addr_limit.seg; + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094ca7aaa..ef2a6a5d78e3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849ffb66..0d0326f388c0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,55 +9,6 @@ #include <asm/uaccess.h> /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 66870223f8c5..877b9a1b2152 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -18,17 +18,17 @@ * r9d : hlen = skb->len - skb->data_len */ #define SKBDATA %r8 - -sk_load_word_ind: - .globl sk_load_word_ind - - add %ebx,%esi /* offset += X */ -# test %esi,%esi /* if (offset < 0) goto bpf_error; */ - js bpf_error +#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ sk_load_word: .globl sk_load_word + test %esi,%esi + js bpf_slow_path_word_neg + +sk_load_word_positive_offset: + .globl sk_load_word_positive_offset + mov %r9d,%eax # hlen sub %esi,%eax # hlen - offset cmp $3,%eax @@ -37,16 +37,15 @@ sk_load_word: bswap %eax /* ntohl() */ ret - -sk_load_half_ind: - .globl sk_load_half_ind - - add %ebx,%esi /* offset += X */ - js bpf_error - sk_load_half: .globl sk_load_half + test %esi,%esi + js bpf_slow_path_half_neg + +sk_load_half_positive_offset: + .globl sk_load_half_positive_offset + mov %r9d,%eax sub %esi,%eax # hlen - offset cmp $1,%eax @@ -55,14 +54,15 @@ sk_load_half: rol $8,%ax # ntohs() ret -sk_load_byte_ind: - .globl sk_load_byte_ind - add %ebx,%esi /* offset += X */ - js bpf_error - sk_load_byte: .globl sk_load_byte + test %esi,%esi + js bpf_slow_path_byte_neg + +sk_load_byte_positive_offset: + .globl sk_load_byte_positive_offset + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ jle bpf_slow_path_byte movzbl (SKBDATA,%rsi),%eax @@ -73,25 +73,21 @@ sk_load_byte: * * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) * Must preserve A accumulator (%eax) - * Inputs : %esi is the offset value, already known positive + * Inputs : %esi is the offset value */ -ENTRY(sk_load_byte_msh) - CFI_STARTPROC +sk_load_byte_msh: + .globl sk_load_byte_msh + test %esi,%esi + js bpf_slow_path_byte_msh_neg + +sk_load_byte_msh_positive_offset: + .globl sk_load_byte_msh_positive_offset cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ jle bpf_slow_path_byte_msh movzbl (SKBDATA,%rsi),%ebx and $15,%bl shl $2,%bl ret - CFI_ENDPROC -ENDPROC(sk_load_byte_msh) - -bpf_error: -# force a return 0 from jit handler - xor %eax,%eax - mov -8(%rbp),%rbx - leaveq - ret /* rsi contains offset and can be scratched */ #define bpf_slow_path_common(LEN) \ @@ -138,3 +134,67 @@ bpf_slow_path_byte_msh: shl $2,%al xchg %eax,%ebx ret + +#define sk_negative_common(SIZE) \ + push %rdi; /* save skb */ \ + push %r9; \ + push SKBDATA; \ +/* rsi already has offset */ \ + mov $SIZE,%ecx; /* size */ \ + call bpf_internal_load_pointer_neg_helper; \ + test %rax,%rax; \ + pop SKBDATA; \ + pop %r9; \ + pop %rdi; \ + jz bpf_error + + +bpf_slow_path_word_neg: + cmp SKF_MAX_NEG_OFF, %esi /* test range */ + jl bpf_error /* offset lower -> error */ +sk_load_word_negative_offset: + .globl sk_load_word_negative_offset + sk_negative_common(4) + mov (%rax), %eax + bswap %eax + ret + +bpf_slow_path_half_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_half_negative_offset: + .globl sk_load_half_negative_offset + sk_negative_common(2) + mov (%rax),%ax + rol $8,%ax + movzwl %ax,%eax + ret + +bpf_slow_path_byte_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_byte_negative_offset: + .globl sk_load_byte_negative_offset + sk_negative_common(1) + movzbl (%rax), %eax + ret + +bpf_slow_path_byte_msh_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_byte_msh_negative_offset: + .globl sk_load_byte_msh_negative_offset + xchg %eax,%ebx /* dont lose A , X is about to be scratched */ + sk_negative_common(1) + movzbl (%rax),%eax + and $15,%al + shl $2,%al + xchg %eax,%ebx + ret + +bpf_error: +# force a return 0 from jit handler + xor %eax,%eax + mov -8(%rbp),%rbx + leaveq + ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5671752f8d9c..0597f95b6da6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly; * assembly code in arch/x86/net/bpf_jit.S */ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; -extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end) set_fs(old_fs); } +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) void bpf_jit_compile(struct sk_filter *fp) { @@ -289,7 +294,7 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT2(0x24, K & 0xFF); /* and imm8,%al */ } else if (K >= 0xFFFF0000) { EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT2(K, 2); + EMIT(K, 2); } else { EMIT1_off32(0x25, K); /* and imm32,%eax */ } @@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp) #endif break; case BPF_S_LD_W_ABS: - func = sk_load_word; + func = CHOOSE_LOAD_FUNC(K, sk_load_word); common_load: seen |= SEEN_DATAREF; - if ((int)K < 0) { - /* Abort the JIT because __load_pointer() is needed. */ - goto out; - } t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ EMIT1_off32(0xe8, t_offset); /* call */ break; case BPF_S_LD_H_ABS: - func = sk_load_half; + func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; case BPF_S_LD_B_ABS: - func = sk_load_byte; + func = CHOOSE_LOAD_FUNC(K, sk_load_byte); goto common_load; case BPF_S_LDX_B_MSH: - if ((int)K < 0) { - /* Abort the JIT because __load_pointer() is needed. */ - goto out; - } + func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = sk_load_byte_msh - (image + addrs[i]); + t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ break; case BPF_S_LD_W_IND: - func = sk_load_word_ind; + func = sk_load_word; common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + if (K) { + if (is_imm8(K)) { + EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ + } else { + EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ + EMIT(K, 4); + } + } else { + EMIT2(0x89,0xde); /* mov %ebx,%esi */ + } EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ break; case BPF_S_LD_H_IND: - func = sk_load_half_ind; + func = sk_load_half; goto common_load_ind; case BPF_S_LD_B_IND: - func = sk_load_byte_ind; + func = sk_load_byte; goto common_load_ind; case BPF_S_JMP_JA: t_offset = addrs[i + K] - addrs[i]; diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e0a37233c0af..e31bcd8f2eee 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -805,7 +805,7 @@ void intel_scu_devices_create(void) } else i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); } - intel_scu_notifier_post(SCU_AVAILABLE, 0L); + intel_scu_notifier_post(SCU_AVAILABLE, NULL); } EXPORT_SYMBOL_GPL(intel_scu_devices_create); @@ -814,7 +814,7 @@ void intel_scu_devices_destroy(void) { int i; - intel_scu_notifier_post(SCU_DOWN, 0L); + intel_scu_notifier_post(SCU_DOWN, NULL); for (i = 0; i < ipc_next_dev; i++) platform_device_del(ipc_devs[i]); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 47936830968c..218cdb16163c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -225,13 +225,13 @@ static void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); } /* Needed by apm.c */ void restore_processor_state(void) { - x86_platform.restore_sched_clock_state(); __restore_processor_state(&saved_context); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h new file mode 100644 index 000000000000..7d01b8c56c00 --- /dev/null +++ b/arch/x86/um/asm/barrier.h @@ -0,0 +1,75 @@ +#ifndef _ASM_UM_BARRIER_H_ +#define _ASM_UM_BARRIER_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 + +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + +#else /* CONFIG_X86_32 */ + +#define mb() asm volatile("mfence" : : : "memory") +#define rmb() asm volatile("lfence" : : : "memory") +#define wmb() asm volatile("sfence" : : : "memory") + +#endif /* CONFIG_X86_32 */ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP + +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +#define smp_rmb() rmb() +#else /* CONFIG_X86_PPRO_FENCE */ +#define smp_rmb() barrier() +#endif /* CONFIG_X86_PPRO_FENCE */ + +#ifdef CONFIG_X86_OOSTORE +#define smp_wmb() wmb() +#else /* CONFIG_X86_OOSTORE */ +#define smp_wmb() barrier() +#endif /* CONFIG_X86_OOSTORE */ + +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + +#else /* CONFIG_SMP */ + +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) + +#endif /* CONFIG_SMP */ + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h deleted file mode 100644 index a459fd9b7598..000000000000 --- a/arch/x86/um/asm/system.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ - -#include <asm/asm.h> -#include <asm/segment.h> -#include <asm/cpufeature.h> -#include <asm/cmpxchg.h> -#include <asm/nops.h> - -#include <linux/kernel.h> -#include <linux/irqflags.h> - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -extern unsigned long arch_align_stack(unsigned long sp); - -void default_idle(void); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b132ade26f77..a8f8844b8d32 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -261,7 +261,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#ifdef CONFIG_ACPI +#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ + !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -349,7 +350,6 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - if (xen_check_mwait()) cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } @@ -967,7 +967,7 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -/* This is called once we have the cpu_possible_map */ +/* This is called once we have the cpu_possible_mask */ void xen_setup_vcpu_info_placement(void) { int cpu; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 988828b479ed..b8e279479a6b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, #endif /* CONFIG_X86_64 */ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; +static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { @@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) * We just don't map the IO APIC - all access is via * hypercalls. Keep the address in the pte for reference. */ - pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); break; #endif @@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); + memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index b480d4207a4c..967633ad98c4 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { .mapping_error = xen_swiotlb_dma_mapping_error, - .alloc_coherent = xen_swiotlb_alloc_coherent, - .free_coherent = xen_swiotlb_free_coherent, + .alloc = xen_swiotlb_alloc_coherent, + .free = xen_swiotlb_free_coherent, .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, .sync_single_for_device = xen_swiotlb_sync_single_for_device, .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 02900e8ce26c..0503c0c493a9 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) static void __cpuinit cpu_bringup(void) { - int cpu = smp_processor_id(); + int cpu; cpu_init(); touch_softlockup_watchdog(); @@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void) static void __init xen_filter_cpu_maps(void) { int i, rc; + unsigned int subtract = 0; if (!xen_initial_domain()) return; @@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void) } else { set_cpu_possible(i, false); set_cpu_present(i, false); + subtract++; } } +#ifdef CONFIG_HOTPLUG_CPU + /* This is akin to using 'nr_cpus' on the Linux command line. + * Which is OK as when we use 'dom0_max_vcpus=X' we can only + * have up to X, while nr_cpu_ids is greater than X. This + * normally is not a problem, except when CPU hotplugging + * is involved and then there might be more than X CPUs + * in the guest - which will not work as there is no + * hypercall to expand the max number of VCPUs an already + * running guest has. So cap it up to X. */ + if (subtract) + nr_cpu_ids = nr_cpu_ids - subtract; +#endif + } static void __init xen_smp_prepare_boot_cpu(void) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 79d7362ad6d1..3e45aa000718 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f + jnz 1f 2: call check_events 1: ENDPATCH(xen_restore_fl_direct) |