From 1f3a8bae21a22808f242ccfb22aca37a635e261e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 11 Nov 2013 16:18:59 +0000 Subject: x86/efi: Fix earlyprintk off-by-one bug Dave reported seeing the following incorrect output on his Thinkpad T420 when using earlyprintk=efi, [ 0.000000] efi: EFI v2.00 by Lenovo ACPI=0xdabfe000 ACPI 2.0=0xdabfe014 SMBIOS=0xdaa9e000 The output should be on one line, not split over two. The cause is an off-by-one error when checking that the efi_y coordinate hasn't been incremented out of bounds. Reported-by: Dave Young Signed-off-by: Matt Fleming --- arch/x86/platform/efi/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 6599a0027b76..81b506d5befd 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -142,7 +142,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) efi_y += font->height; } - if (efi_y + font->height >= si->lfb_height) { + if (efi_y + font->height > si->lfb_height) { u32 i; efi_y -= font->height; -- cgit v1.2.3 From 5551a34e5aeab868f8d37f70d8754868921b4ee5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 20 Nov 2013 13:31:49 -0800 Subject: x86-64, build: Always pass in -mno-sse Always pass in the -mno-sse argument, regardless if -preferred-stack-boundary is supported. We never want to generate SSE instructions in the kernel unless we *really* know what we're doing. According to H. J. Lu, any version of gcc new enough that we support it at all should handle the -mno-sse option, so just add it unconditionally. Reported-by: Kevin B. Smith Signed-off-by: H. Peter Anvin Cc: H. J. Lu Link: http://lkml.kernel.org/n/tip-j21wzqv790q834n7yc6g80j1@git.kernel.org Cc: # build fix only --- arch/x86/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 41250fb33985..eda00f9be0cf 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -31,6 +31,9 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return + # Don't autogenerate SSE instructions + KBUILD_CFLAGS += -mno-sse + # Never want PIC in a 32-bit kernel, prevent breakage with GCC built # with nonstandard options KBUILD_CFLAGS += -fno-pic @@ -57,8 +60,11 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 + # Don't autogenerate SSE instructions + KBUILD_CFLAGS += -mno-sse + # Use -mpreferred-stack-boundary=3 if supported. - KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) + KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) -- cgit v1.2.3 From e0f6dec35f9286e78879fe1ac92803fd69fc4fdc Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 4 Dec 2013 14:31:28 -0800 Subject: x86, bitops: Correct the assembly constraints to testing bitops In checkin: 0c44c2d0f459 x86: Use asm goto to implement better modify_and_test() functions the various functions which do modify and test were unified and optimized using "asm goto". However, this change missed the detail that the bitops require an "Ir" constraint rather than an "er" constraint ("I" = integer constant from 0-31, "e" = signed 32-bit integer constant). This would cause code to miscompile if these functions were used on constant bit positions 32-255 and the build to fail if used on constant bit positions above 255. Add the constraints as a parameter to the GEN_BINARY_RMWcc() macro to avoid this problem. Reported-by: Jesse Brandeburg Signed-off-by: H. Peter Anvin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/529E8719.4070202@zytor.com --- arch/x86/include/asm/atomic.h | 4 ++-- arch/x86/include/asm/atomic64_64.h | 4 ++-- arch/x86/include/asm/bitops.h | 6 +++--- arch/x86/include/asm/local.h | 4 ++-- arch/x86/include/asm/rmwcc.h | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index da31c8b8a92d..b17f4f48ecd7 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -77,7 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v) */ static inline int atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } /** @@ -141,7 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v) */ static inline int atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 3f065c985aee..46e9052bbd28 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); } /** @@ -138,7 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) */ static inline int atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 6d76d0935989..9fc1af74dc83 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -205,7 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } /** @@ -251,7 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } /** @@ -304,7 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 5b23e605e707..4ad6560847b1 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline int local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); } /** @@ -92,7 +92,7 @@ static inline int local_inc_and_test(local_t *l) */ static inline int local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 1ff990f1de8e..8f7866a5b9a4 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -16,8 +16,8 @@ cc_label: \ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ __GEN_RMWcc(op " " arg0, var, cc) -#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ - __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val)) +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) #else /* !CC_HAVE_ASM_GOTO */ @@ -33,8 +33,8 @@ do { \ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ __GEN_RMWcc(op " " arg0, var, cc) -#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ - __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val)) +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) #endif /* CC_HAVE_ASM_GOTO */ -- cgit v1.2.3 From 2885432aaf15c1b7e65c787bfe7c5fec428296f0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 4 Dec 2013 16:07:49 -0800 Subject: x86/apic, doc: Justification for disabling IO APIC before Local APIC Since erratum AVR31 in "Intel Atom Processor C2000 Product Family Specification Update" is now published, I added a justification comment for disabling IO APIC before Local APIC, as changed in commit: 522e66464467 x86/apic: Disable I/O APIC before shutdown of the local APIC Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1386202069-51515-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index da3c599584a3..c752cb43e52f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -558,6 +558,17 @@ void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_X86_IO_APIC + /* + * Disabling IO APIC before local APIC is a workaround for + * erratum AVR31 in "Intel Atom Processor C2000 Product Family + * Specification Update". In this situation, interrupts that target + * a Logical Processor whose Local APIC is either in the process of + * being hardware disabled or software disabled are neither delivered + * nor discarded. When this erratum occurs, the processor may hang. + * + * Even without the erratum, it still makes sense to quiet IO APIC + * before disabling Local APIC. + */ disable_IO_APIC(); #endif -- cgit v1.2.3 From cf30d52e2d11523c42048ab89ed4215b5021526a Mon Sep 17 00:00:00 2001 From: Maria Dimakopoulou Date: Thu, 5 Dec 2013 01:24:37 +0200 Subject: perf/x86: Fix constraint table end marker bug The EVENT_CONSTRAINT_END() macro defines the end marker as a constraint with a weight of zero. This was all fine until we blacklisted the corrupting memory events on Intel IvyBridge. These events are blacklisted by using a counter bitmask of zero. Thus, they also get a constraint weight of zero. The iteration macro: for_each_constraint tests the weight==0. Therefore, it was stopping at the first blacklisted event, i.e., 0xd0. The corrupting events were therefore considered as unconstrained and were scheduled on any of the generic counters. This patch fixes the end marker to have a weight of -1. With this, the blacklisted events get an empty constraint and cannot be scheduled which is what we want for now. Signed-off-by: Maria Dimakopoulou Reviewed-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/20131204232437.GA10689@starlight Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index fd00bb29425d..c1a861829d81 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,11 +262,20 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) +/* + * We define the end marker as having a weight of -1 + * to enable blacklisting of events using a counter bitmask + * of zero and thus a weight of zero. + * The end marker has a weight that cannot possibly be + * obtained from counting the bits in the bitmask. + */ +#define EVENT_CONSTRAINT_END { .weight = -1 } +/* + * Check for end marker with weight == -1 + */ #define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->weight; (e)++) + for ((e) = (c); (e)->weight != -1; (e)++) /* * Extra registers for specific events. -- cgit v1.2.3 From 8b3b005d675726e38bc504d2e35a991e55819155 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 9 Dec 2013 15:43:38 -0800 Subject: x86, build: Pass in additional -mno-mmx, -mno-sse options In checkin 5551a34e5aea x86-64, build: Always pass in -mno-sse we unconditionally added -mno-sse to the main build, to keep newer compilers from generating SSE instructions from autovectorization. However, this did not extend to the special environments (arch/x86/boot, arch/x86/boot/compressed, and arch/x86/realmode/rm). Add -mno-sse to the compiler command line for these environments, and add -mno-mmx to all the environments as well, as we don't want a compiler to generate MMX code either. This patch also removes a $(cc-option) call for -m32, since we have long since stopped supporting compilers too old for the -m32 option, and in fact hardcode it in other places in the Makefiles. Reported-by: Kevin B. Smith Cc: Sunil K. Pandey Signed-off-by: H. Peter Anvin Cc: H. J. Lu Link: http://lkml.kernel.org/n/tip-j21wzqv790q834n7yc6g80j1@git.kernel.org Cc: # build fix only --- arch/x86/Makefile | 8 ++++---- arch/x86/boot/Makefile | 6 +++--- arch/x86/boot/compressed/Makefile | 1 + arch/x86/realmode/rm/Makefile | 3 ++- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index eda00f9be0cf..57d021507120 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -31,8 +31,8 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return - # Don't autogenerate SSE instructions - KBUILD_CFLAGS += -mno-sse + # Don't autogenerate MMX or SSE instructions + KBUILD_CFLAGS += -mno-mmx -mno-sse # Never want PIC in a 32-bit kernel, prevent breakage with GCC built # with nonstandard options @@ -60,8 +60,8 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 - # Don't autogenerate SSE instructions - KBUILD_CFLAGS += -mno-sse + # Don't autogenerate MMX or SSE instructions + KBUILD_CFLAGS += -mno-mmx -mno-sse # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index dce69a256896..d9c11956fce0 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -53,18 +53,18 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -KBUILD_CFLAGS := $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ +KBUILD_CFLAGS := $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ -include $(srctree)/$(src)/code16gcc.h \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ + -mno-mmx -mno-sse \ $(call cc-option, -ffreestanding) \ $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-unit-at-a-time)) \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) -KBUILD_CFLAGS += $(call cc-option, -m32) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index dcd90df10ab4..c8a6792e7842 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -13,6 +13,7 @@ KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 88692871823f..9cac82588cbc 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -73,9 +73,10 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \ -march=i386 -mregparm=3 \ -include $(srctree)/$(src)/../../boot/code16gcc.h \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ + -mno-mmx -mno-sse \ $(call cc-option, -ffreestanding) \ $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-unit-at-a-time)) \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -- cgit v1.2.3 From 3eae49ca8954f958b2001ab5643ef302cb7b67c7 Mon Sep 17 00:00:00 2001 From: cpw Date: Tue, 3 Dec 2013 17:15:30 -0600 Subject: x86/UV: Fix NULL pointer dereference in uv_flush_tlb_others() if the 'nobau' boot option is used The SGI UV tlb shootdown code panics the system with a NULL pointer deference if 'nobau' is specified on the boot commandline. uv_flush_tlb_other() gets called for every flush, whether the BAU is disabled or not. It should not be keeping the s_enters statistic while the BAU is disabled. The panic occurs because during initialization init_per_cpu_tunables() does not set the bcp->statp pointer if 'nobau' was specified. Signed-off-by: Cliff Wickman Cc: # 3.12.x Link: http://lkml.kernel.org/r/E1VnzBi-0005yF-MU@eag09.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 0f92173a12b6..efe4d7220397 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1070,12 +1070,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, unsigned long status; bcp = &per_cpu(bau_control, cpu); - stat = bcp->statp; - stat->s_enters++; if (bcp->nobau) return cpumask; + stat = bcp->statp; + stat->s_enters++; + if (bcp->busy) { descriptor_status = read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); -- cgit v1.2.3 From 04bf9ba720fcc4fa313fa122b799ae0989b6cd50 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 29 Nov 2013 14:44:43 -0500 Subject: x86, efi: Don't use (U)EFI time services on 32 bit UEFI time services are often broken once we're in virtual mode. We were already refusing to use them on 64-bit systems, but it turns out that they're also broken on some 32-bit firmware, including the Dell Venue. Disable them for now, we can revisit once we have the 1:1 mappings code incorporated. Signed-off-by: Matthew Garrett Link: http://lkml.kernel.org/r/1385754283-2464-1-git-send-email-matthew.garrett@nebula.com Cc: Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/platform/efi/efi.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92c02344a060..cceb813044ef 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -690,13 +690,6 @@ void __init efi_init(void) set_bit(EFI_MEMMAP, &x86_efi_facility); -#ifdef CONFIG_X86_32 - if (efi_is_native()) { - x86_platform.get_wallclock = efi_get_time; - x86_platform.set_wallclock = efi_set_rtc_mmss; - } -#endif - #if EFI_DEBUG print_efi_memmap(); #endif -- cgit v1.2.3 From ba1f14fbe70965ae0fb1655a5275a62723f65b77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2013 14:26:41 +0100 Subject: sched: Remove PREEMPT_NEED_RESCHED from generic code While hunting a preemption issue with Alexander, Ben noticed that the currently generic PREEMPT_NEED_RESCHED stuff is horribly broken for load-store architectures. We currently rely on the IPI to fold TIF_NEED_RESCHED into PREEMPT_NEED_RESCHED, but when this IPI lands while we already have a load for the preempt-count but before the store, the store will erase the PREEMPT_NEED_RESCHED change. The current preempt-count only works on load-store archs because interrupts are assumed to be completely balanced wrt their preempt_count fiddling; the previous preempt_count load will match the preempt_count state after the interrupt and therefore nothing gets lost. This patch removes the PREEMPT_NEED_RESCHED usage from generic code and pushes it into x86 arch code; the generic code goes back to relying on TIF_NEED_RESCHED. Boot tested on x86_64 and compile tested on ppc64. Reported-by: Benjamin Herrenschmidt Reported-and-Tested-by: Alexander Graf Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20131128132641.GP10022@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 8729723636fd..c8b051933b1b 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -7,6 +7,12 @@ DECLARE_PER_CPU(int, __preempt_count); +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ +#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) + /* * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users * that think a non-zero value indicates we cannot preempt. @@ -74,6 +80,11 @@ static __always_inline void __preempt_count_sub(int val) __this_cpu_add_4(__preempt_count, -val); } +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ static __always_inline bool __preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); -- cgit v1.2.3 From be5e610c0fd6ef772cafb9e0bd4128134804aef3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Nov 2013 18:27:06 +0100 Subject: math64: Add mul_u64_u32_shr() Introduce mul_u64_u32_shr() as proposed by Andy a while back; it allows using 64x64->128 muls on 64bit archs and recent GCC which defines __SIZEOF_INT128__ and __int128. (This new method will be used by the scheduler.) Signed-off-by: Peter Zijlstra Cc: fweisbec@gmail.com Cc: Andy Lutomirski Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-hxjoeuzmrcaumR0uZwjpe2pv@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e903c71f7e69..0952ecd60eca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,6 +26,7 @@ config X86 select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_INT128 if X86_64 select ARCH_WANTS_PROT_NUMA_PROT_NONE select HAVE_IDE select HAVE_OPROFILE -- cgit v1.2.3 From b963a22e6d1a266a67e9eecc88134713fd54775c Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Tue, 19 Nov 2013 14:12:18 -0800 Subject: KVM: x86: Fix potential divide by 0 in lapic (CVE-2013-6367) Under guest controllable circumstances apic_get_tmcct will execute a divide by zero and cause a crash. If the guest cpuid support tsc deadline timers and performs the following sequence of requests the host will crash. - Set the mode to periodic - Set the TMICT to 0 - Set the mode bits to 11 (neither periodic, nor one shot, nor tsc deadline) - Set the TMICT to non-zero. Then the lapic_timer.period will be 0, but the TMICT will not be. If the guest then reads from the TMCCT then the host will perform a divide by 0. This patch ensures that if the lapic_timer.period is 0, then the division does not occur. Reported-by: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Andrew Honig Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5439117d5c4c..89b52ec7d09c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -841,7 +841,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) ASSERT(apic != NULL); /* if initial count is 0, current count should also be 0 */ - if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) + if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 || + apic->lapic_timer.period == 0) return 0; remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); -- cgit v1.2.3 From fda4e2e85589191b123d31cdc21fd33ee70f50fd Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Wed, 20 Nov 2013 10:23:22 -0800 Subject: KVM: x86: Convert vapic synchronization to _cached functions (CVE-2013-6368) In kvm_lapic_sync_from_vapic and kvm_lapic_sync_to_vapic there is the potential to corrupt kernel memory if userspace provides an address that is at the end of a page. This patches concerts those functions to use kvm_write_guest_cached and kvm_read_guest_cached. It also checks the vapic_address specified by userspace during ioctl processing and returns an error to userspace if the address is not a valid GPA. This is generally not guest triggerable, because the required write is done by firmware that runs before the guest. Also, it only affects AMD processors and oldish Intel that do not have the FlexPriority feature (unless you disable FlexPriority, of course; then newer processors are also affected). Fixes: b93463aa59d6 ('KVM: Accelerated apic support') Reported-by: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Andrew Honig Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 27 +++++++++++++++------------ arch/x86/kvm/lapic.h | 4 ++-- arch/x86/kvm/x86.c | 40 +--------------------------------------- 3 files changed, 18 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 89b52ec7d09c..b8bec45c1610 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1692,7 +1692,6 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) { u32 data; - void *vapic; if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); @@ -1700,9 +1699,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - vapic = kmap_atomic(vcpu->arch.apic->vapic_page); - data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); - kunmap_atomic(vapic); + kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); apic_set_tpr(vcpu->arch.apic, data & 0xff); } @@ -1738,7 +1736,6 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) u32 data, tpr; int max_irr, max_isr; struct kvm_lapic *apic = vcpu->arch.apic; - void *vapic; apic_sync_pv_eoi_to_guest(vcpu, apic); @@ -1754,18 +1751,24 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) max_isr = 0; data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - vapic = kmap_atomic(vcpu->arch.apic->vapic_page); - *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; - kunmap_atomic(vapic); + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); } -void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) +int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { - vcpu->arch.apic->vapic_addr = vapic_addr; - if (vapic_addr) + if (vapic_addr) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.apic->vapic_cache, + vapic_addr, sizeof(u32))) + return -EINVAL; __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); - else + } else { __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); + } + + vcpu->arch.apic->vapic_addr = vapic_addr; + return 0; } int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c730ac9fe801..c8b0d0d2da5c 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -34,7 +34,7 @@ struct kvm_lapic { */ void *regs; gpa_t vapic_addr; - struct page *vapic_page; + struct gfn_to_hva_cache vapic_cache; unsigned long pending_events; unsigned int sipi_vector; }; @@ -76,7 +76,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); -void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); +int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21ef1ba184ae..5d004da1e35d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3214,8 +3214,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; - r = 0; - kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); break; } case KVM_X86_SETUP_MCE: { @@ -5739,36 +5738,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) !kvm_event_needs_reinjection(vcpu); } -static int vapic_enter(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - struct page *page; - - if (!apic || !apic->vapic_addr) - return 0; - - page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - if (is_error_page(page)) - return -EFAULT; - - vcpu->arch.apic->vapic_page = page; - return 0; -} - -static void vapic_exit(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - int idx; - - if (!apic || !apic->vapic_addr) - return; - - idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_release_page_dirty(apic->vapic_page); - mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - srcu_read_unlock(&vcpu->kvm->srcu, idx); -} - static void update_cr8_intercept(struct kvm_vcpu *vcpu) { int max_irr, tpr; @@ -6069,11 +6038,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - r = vapic_enter(vcpu); - if (r) { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - return r; - } r = 1; while (r > 0) { @@ -6132,8 +6096,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - vapic_exit(vcpu); - return r; } -- cgit v1.2.3 From 17d68b763f09a9ce824ae23eb62c9efc57b69271 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 12 Dec 2013 21:20:08 +0100 Subject: KVM: x86: fix guest-initiated crash with x2apic (CVE-2013-6376) A guest can cause a BUG_ON() leading to a host kernel crash. When the guest writes to the ICR to request an IPI, while in x2apic mode the following things happen, the destination is read from ICR2, which is a register that the guest can control. kvm_irq_delivery_to_apic_fast uses the high 16 bits of ICR2 as the cluster id. A BUG_ON is triggered, which is a protection against accessing map->logical_map with an out-of-bounds access and manages to avoid that anything really unsafe occurs. The logic in the code is correct from real HW point of view. The problem is that KVM supports only one cluster with ID 0 in clustered mode, but the code that has the bug does not take this into account. Reported-by: Lars Bull Cc: stable@vger.kernel.org Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b8bec45c1610..dec48bfaddb8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -143,6 +143,8 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +#define KVM_X2APIC_CID_BITS 0 + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -180,7 +182,8 @@ static void recalculate_apic_map(struct kvm *kvm) if (apic_x2apic_mode(apic)) { new->ldr_bits = 32; new->cid_shift = 16; - new->cid_mask = new->lid_mask = 0xffff; + new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; + new->lid_mask = 0xffff; } else if (kvm_apic_sw_enabled(apic) && !new->cid_mask /* flat mode */ && kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { -- cgit v1.2.3