From 3cf5d076fb4d48979f382bc9452765bf8b79e740 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 23 May 2019 10:17:27 -0500 Subject: signal: Remove task parameter from force_sig All of the remaining callers pass current into force_sig so remove the task parameter to make this obvious and to make misuse more difficult in the future. This also makes it clear force_sig passes current into force_sig_info. Signed-off-by: "Eric W. Biederman" --- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..7ea87f4ad0b7 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -268,7 +268,7 @@ do_ret: return true; sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return true; } -- cgit v1.2.3 From 2e1661d2673667d886cd40ad9f414cb6db48d8da Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 23 May 2019 11:04:24 -0500 Subject: signal: Remove the task parameter from force_sig_fault As synchronous exceptions really only make sense against the current task (otherwise how are you synchronous) remove the task parameter from from force_sig_fault to make it explicit that is what is going on. The two known exceptions that deliver a synchronous exception to a stopped ptraced task have already been changed to force_sig_fault_to_task. The callers have been changed with the following emacs regular expression (with obvious variations on the architectures that take more arguments) to avoid typos: force_sig_fault[(]\([^,]+\)[,]\([^,]+\)[,]\([^,]+\)[,]\W+current[)] -> force_sig_fault(\1,\2,\3) Signed-off-by: "Eric W. Biederman" --- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 7ea87f4ad0b7..2f31faf339d5 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -106,7 +106,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr); return false; } else { return true; -- cgit v1.2.3 From cb1aaebea8d79860181559d7b5d482aea63db113 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Jun 2019 15:54:32 -0300 Subject: docs: fix broken documentation links Mostly due to x86 and acpi conversion, several documentation links are still pointing to the old file. Fix them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Wolfram Sang Reviewed-by: Sven Van Asbroeck Reviewed-by: Bhupesh Sharma Acked-by: Mark Brown Signed-off-by: Jonathan Corbet --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 11aa3b2afa4d..33f9fc38d014 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -8,7 +8,7 @@ * * entry.S contains the system-call and fault low-level handling routines. * - * Some of this is documented in Documentation/x86/entry_64.txt + * Some of this is documented in Documentation/x86/entry_64.rst * * A note on terminology: * - iret frame: Architecture defined interrupt frame from SS to RIP -- cgit v1.2.3 From 8f3220a806545442f6f26195bc491520f5276e7c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 25 May 2019 11:37:43 +0200 Subject: arch: wire-up clone3() syscall Wire up the clone3() call on all arches that don't require hand-rolled assembly. Some of the arches look like they need special assembly massaging and it is probably smarter if the appropriate arch maintainers would do the actual wiring. Arches that are wired-up are: - x86{_32,64} - arm{64} - xtensa Signed-off-by: Christian Brauner Acked-by: Arnd Bergmann Cc: Kees Cook Cc: David Howells Cc: Andrew Morton Cc: Oleg Nesterov Cc: Adrian Reber Cc: Linus Torvalds Cc: Al Viro Cc: Florian Weimer Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: x86@kernel.org --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..80e26211feff 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -438,3 +438,4 @@ 431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 432 i386 fsmount sys_fsmount __ia32_sys_fsmount 433 i386 fspick sys_fspick __ia32_sys_fspick +436 i386 clone3 sys_clone3 __ia32_sys_clone3 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..7968f0b5b5e8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,7 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +436 common clone3 __x64_sys_clone3/ptregs # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.3 From 7615d9e1780e26e0178c93c55b73309a5dc093d7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 24 May 2019 12:44:59 +0200 Subject: arch: wire-up pidfd_open() This wires up the pidfd_open() syscall into all arches at once. Signed-off-by: Christian Brauner Reviewed-by: David Howells Reviewed-by: Oleg Nesterov Acked-by: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Kees Cook Cc: Joel Fernandes (Google) Cc: Thomas Gleixner Cc: Jann Horn Cc: Andy Lutomirsky Cc: Andrew Morton Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro Cc: linux-api@vger.kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-ia64@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-mips@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Cc: x86@kernel.org --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..43e4429a5272 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -438,3 +438,4 @@ 431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 432 i386 fsmount sys_fsmount __ia32_sys_fsmount 433 i386 fspick sys_fspick __ia32_sys_fspick +434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..1bee0a77fdd3 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,7 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +434 common pidfd_open __x64_sys_pidfd_open # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit v1.2.3 From 1cbec37b3f9cff074a67bef4fc34b30a09958a0a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 9 Jul 2019 08:34:02 +0200 Subject: x86/entry/32: Fix ENDPROC of common_spurious common_spurious is currently ENDed erroneously. common_interrupt is used in its ENDPROC. So fix this mistake. Found by my asm macros rewrite patchset. Fixes: f8a8fe61fec8 ("x86/irq: Seperate unused system vectors from spurious entry again") Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190709063402.19847-1-jslaby@suse.cz --- arch/x86/entry/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 1285e5abf669..90b473297299 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1189,7 +1189,7 @@ common_spurious: movl %esp, %eax call smp_spurious_interrupt jmp ret_from_intr -ENDPROC(common_interrupt) +ENDPROC(common_spurious) #endif /* -- cgit v1.2.3 From e9a1379f9219be439f47a0f063431a92dc529eda Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 12 Jul 2019 19:15:55 +0900 Subject: x86/vdso: Fix flip/flop vdso build bug Two consecutive "make" on an already compiled kernel tree will show different behavior: $ make CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh DESCEND objtool CHK include/generated/compile.h VDSOCHK arch/x86/entry/vdso/vdso64.so.dbg VDSOCHK arch/x86/entry/vdso/vdso32.so.dbg Kernel: arch/x86/boot/bzImage is ready (#3) Building modules, stage 2. MODPOST 12 modules $ make make CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh DESCEND objtool CHK include/generated/compile.h VDSO arch/x86/entry/vdso/vdso64.so.dbg OBJCOPY arch/x86/entry/vdso/vdso64.so VDSO2C arch/x86/entry/vdso/vdso-image-64.c CC arch/x86/entry/vdso/vdso-image-64.o VDSO arch/x86/entry/vdso/vdso32.so.dbg OBJCOPY arch/x86/entry/vdso/vdso32.so VDSO2C arch/x86/entry/vdso/vdso-image-32.c CC arch/x86/entry/vdso/vdso-image-32.o AR arch/x86/entry/vdso/built-in.a AR arch/x86/entry/built-in.a AR arch/x86/built-in.a GEN .version CHK include/generated/compile.h UPD include/generated/compile.h CC init/version.o AR init/built-in.a LD vmlinux.o This is causing "LD vmlinux" once every two times even without any modifications. This is the same bug fixed in commit 92a4728608a8 ("x86/boot: Fix if_changed build flip/flop bug"). Two "if_changed" cannot be used in one target. Fix this merging two commands into one function. Fixes: 7ac870747988 ("x86/vdso: Switch to generic vDSO implementation") Signed-off-by: Naohiro Aota Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Masahiro Yamada Link: https://lkml.kernel.org/r/20190712101556.17833-1-naohiro.aota@wdc.com --- arch/x86/entry/vdso/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 39106111be86..34773395139a 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -56,8 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE - $(call if_changed,vdso) - $(call if_changed,vdso_check) + $(call if_changed,vdso_and_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -127,8 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE - $(call if_changed,vdso) - $(call if_changed,vdso_check) + $(call if_changed,vdso_and_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -167,8 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/note.o \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o - $(call if_changed,vdso) - $(call if_changed,vdso_check) + $(call if_changed,vdso_and_check) # # The DSO images are built using a special linker script. @@ -184,6 +181,9 @@ VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ -Bsymbolic GCOV_PROFILE := n +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + # # Install the unstripped copies of vdso*.so. If our toolchain supports # build-id, install .build-id links as well. -- cgit v1.2.3 From b23e5844dfe78a80ba672793187d3f52e4b528d7 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sun, 14 Jul 2019 17:15:32 +0800 Subject: xen/pv: Fix a boot up hang revealed by int3 self test Commit 7457c0da024b ("x86/alternatives: Add int3_emulate_call() selftest") is used to ensure there is a gap setup in int3 exception stack which could be used for inserting call return address. This gap is missed in XEN PV int3 exception entry path, then below panic triggered: [ 0.772876] general protection fault: 0000 [#1] SMP NOPTI [ 0.772886] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0+ #11 [ 0.772893] RIP: e030:int3_magic+0x0/0x7 [ 0.772905] RSP: 3507:ffffffff82203e98 EFLAGS: 00000246 [ 0.773334] Call Trace: [ 0.773334] alternative_instructions+0x3d/0x12e [ 0.773334] check_bugs+0x7c9/0x887 [ 0.773334] ? __get_locked_pte+0x178/0x1f0 [ 0.773334] start_kernel+0x4ff/0x535 [ 0.773334] ? set_init_arg+0x55/0x55 [ 0.773334] xen_start_kernel+0x571/0x57a For 64bit PV guests, Xen's ABI enters the kernel with using SYSRET, with %rcx/%r11 on the stack. To convert back to "normal" looking exceptions, the xen thunks do 'xen_*: pop %rcx; pop %r11; jmp *'. E.g. Extracting 'xen_pv_trap xenint3' we have: xen_xenint3: pop %rcx; pop %r11; jmp xenint3 As xenint3 and int3 entry code are same except xenint3 doesn't generate a gap, we can fix it by using int3 and drop useless xenint3. Signed-off-by: Zhenzhong Duan Reviewed-by: Juergen Gross Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/entry/entry_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0ea4831a72a4..35a66fcfcb91 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1176,7 +1176,6 @@ idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN_PV idtentry xennmi do_nmi has_error_code=0 idtentry xendebug do_debug has_error_code=0 -idtentry xenint3 do_int3 has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 -- cgit v1.2.3 From 89ff7131f78ad083665382146e66430d66399076 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 13 Jul 2019 13:01:10 +0900 Subject: kbuild: add --hash-style= and --build-id unconditionally As commit 1e0221374e30 ("mips: vdso: drop unnecessary cc-ldoption") explained, these flags are supported by the minimal required version of binutils. They are supported by ld.lld too. Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor --- arch/x86/entry/vdso/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 34773395139a..8df549138193 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -176,9 +176,8 @@ quiet_cmd_vdso = VDSO $@ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ - $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \ - -Bsymbolic +VDSO_LDFLAGS = -shared --hash-style=both --build-id \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ -- cgit v1.2.3 From 55aedddb6149ab71bec9f050846855113977b033 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:55 +0200 Subject: x86/paravirt: Make read_cr2() CALLEE_SAVE The one paravirt read_cr2() implementation (Xen) is actually quite trivial and doesn't need to clobber anything other than the return register. Making read_cr2() CALLEE_SAVE avoids all the PUSH/POP nonsense and allows more convenient use from assembly. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: bp@alien8.de Cc: rostedt@goodmis.org Cc: luto@kernel.org Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: zhe.he@windriver.com Cc: joel@joelfernandes.org Cc: devel@etsukata.com Link: https://lkml.kernel.org/r/20190711114335.887392493@infradead.org --- arch/x86/entry/calling.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 9f1f9e3b8230..830bd984182b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with .Lafter_call_\@: #endif .endm + +#ifdef CONFIG_PARAVIRT_XXL +#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg +#else +#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg +#endif -- cgit v1.2.3 From e67f1c11e5ea7fa47449a16325ecc997dbbf9bdf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:56 +0200 Subject: x86/entry/32: Simplify common_exception Adding one more option to SAVE_ALL can be used in common_exception to simplify things. This also saves duplication later where page_fault will no longer use common_exception. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Andy Lutomirski Cc: bp@alien8.de Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: jgross@suse.com Cc: zhe.he@windriver.com Cc: joel@joelfernandes.org Cc: devel@etsukata.com Link: https://lkml.kernel.org/r/20190711114335.945136187@infradead.org --- arch/x86/entry/entry_32.S | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 90b473297299..4d4b6100f0e8 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -294,9 +294,11 @@ .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 cld +.if \skip_gs == 0 PUSH_GS +.endif FIXUP_FRAME pushl %fs pushl %es @@ -313,13 +315,13 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs +.if \skip_gs == 0 SET_KERNEL_GS %edx - +.endif /* Switch to kernel stack if necessary */ .if \switch_stacks > 0 SWITCH_TO_KERNEL_STACK .endif - .endm .macro SAVE_ALL_NMI cr3_reg:req @@ -1448,32 +1450,20 @@ END(page_fault) common_exception: /* the function address is in %gs's slot on the stack */ - FIXUP_FRAME - pushl %fs - pushl %es - pushl %ds - pushl %eax - movl $(__USER_DS), %eax - movl %eax, %ds - movl %eax, %es - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - SWITCH_TO_KERNEL_STACK + SAVE_ALL switch_stacks=1 skip_gs=1 ENCODE_FRAME_POINTER - cld UNWIND_ESPFIX_STACK + + /* fixup %gs */ GS_TO_REG %ecx movl PT_GS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart REG_TO_PTGS %ecx SET_KERNEL_GS %ecx + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi -- cgit v1.2.3 From 2fd37912cfb019228bf246215938e6f7619516a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:57 +0200 Subject: x86/entry/64: Simplify idtentry a little There's a bunch of duplication in idtentry, namely the .Lfrom_usermode_switch_stack is a paranoid=0 copy of the normal flow. Make this explicit by creating a idtentry_part helper macro. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Acked-by: Andy Lutomirski Cc: bp@alien8.de Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: jgross@suse.com Cc: zhe.he@windriver.com Cc: joel@joelfernandes.org Cc: devel@etsukata.com Link: https://lkml.kernel.org/r/20190711114336.002429503@infradead.org --- arch/x86/entry/entry_64.S | 102 ++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 54 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0ea4831a72a4..3db5fede743b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -864,6 +864,52 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) +.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0 + + .if \paranoid + call paranoid_entry + /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ + .else + call error_entry + .endif + UNWIND_HINT_REGS + + .if \paranoid + .if \shift_ist != -1 + TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ + .else + TRACE_IRQS_OFF + .endif + .endif + + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp), %rsi /* get error code */ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi, %esi /* no error code */ + .endif + + .if \shift_ist != -1 + subq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + call \do_sym + + .if \shift_ist != -1 + addq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + .if \paranoid + /* this procedure expect "no swapgs" flag in ebx */ + jmp paranoid_exit + .else + jmp error_exit + .endif + +.endm + /** * idtentry - Generate an IDT entry stub * @sym: Name of the generated entry point @@ -934,47 +980,7 @@ ENTRY(\sym) .Lfrom_usermode_no_gap_\@: .endif - .if \paranoid - call paranoid_entry - .else - call error_entry - .endif - UNWIND_HINT_REGS - /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - - .if \paranoid - .if \shift_ist != -1 - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ - .else - TRACE_IRQS_OFF - .endif - .endif - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ - .endif - - .if \shift_ist != -1 - subq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - call \do_sym - - .if \shift_ist != -1 - addq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - /* these procedures expect "no swapgs" flag in ebx */ - .if \paranoid - jmp paranoid_exit - .else - jmp error_exit - .endif + idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset .if \paranoid == 1 /* @@ -983,21 +989,9 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ .Lfrom_usermode_switch_stack_\@: - call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ + idtentry_part \do_sym, \has_error_code, paranoid=0 .endif - call \do_sym - - jmp error_exit - .endif _ASM_NOKPROBE(\sym) END(\sym) .endm -- cgit v1.2.3 From 4234653e882740cbf6625eeee294e388b3176583 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:58 +0200 Subject: x86/entry/64: Update comments and sanity tests for create_gap Commit 2700fefdb2d9 ("x86_64: Add gap to int3 to allow for call emulation") forgot to update the comment, do so now. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Acked-by: Andy Lutomirski Cc: bp@alien8.de Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: jgross@suse.com Cc: zhe.he@windriver.com Cc: joel@joelfernandes.org Cc: devel@etsukata.com Link: https://lkml.kernel.org/r/20190711114336.059780563@infradead.org --- arch/x86/entry/entry_64.S | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3db5fede743b..95ae05f0edf2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -913,15 +913,16 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /** * idtentry - Generate an IDT entry stub * @sym: Name of the generated entry point - * @do_sym: C function to be called - * @has_error_code: True if this IDT vector has an error code on the stack - * @paranoid: non-zero means that this vector may be invoked from + * @do_sym: C function to be called + * @has_error_code: True if this IDT vector has an error code on the stack + * @paranoid: non-zero means that this vector may be invoked from * kernel mode with user GSBASE and/or user CR3. * 2 is special -- see below. * @shift_ist: Set to an IST index if entries from kernel mode should - * decrement the IST stack so that nested entries get a + * decrement the IST stack so that nested entries get a * fresh stack. (This is for #DB, which has a nasty habit - * of recursing.) + * of recursing.) + * @create_gap: create a 6-word stack gap when coming from kernel mode. * * idtentry generates an IDT stub that sets up a usable kernel context, * creates struct pt_regs, and calls @do_sym. The stub has the following @@ -951,10 +952,14 @@ ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ - .if \shift_ist != -1 && \paranoid == 0 + .if \shift_ist != -1 && \paranoid != 1 .error "using shift_ist requires paranoid=1" .endif + .if \create_gap && \paranoid + .error "using create_gap requires paranoid=0" + .endif + ASM_CLAC .if \has_error_code == 0 -- cgit v1.2.3 From a0d14b8909de55139b8702fe0c7e80b69763dcfb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jul 2019 13:40:59 +0200 Subject: x86/mm, tracing: Fix CR2 corruption Despite the current efforts to read CR2 before tracing happens there still exist a number of possible holes: idtentry page_fault do_page_fault has_error_code=1 call error_entry TRACE_IRQS_OFF call trace_hardirqs_off* #PF // modifies CR2 CALL_enter_from_user_mode __context_tracking_exit() trace_user_exit(0) #PF // modifies CR2 call do_page_fault address = read_cr2(); /* whoopsie */ And similar for i386. Fix it by pulling the CR2 read into the entry code, before any of that stuff gets a chance to run and ruin things. Reported-by: He Zhe Reported-by: Eiichi Tsukata Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: bp@alien8.de Cc: rostedt@goodmis.org Cc: torvalds@linux-foundation.org Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: jgross@suse.com Cc: joel@joelfernandes.org Link: https://lkml.kernel.org/r/20190711114336.116812491@infradead.org Debugged-by: Steven Rostedt --- arch/x86/entry/entry_32.S | 25 ++++++++++++++++++++++--- arch/x86/entry/entry_64.S | 35 ++++++++++++++++++----------------- 2 files changed, 40 insertions(+), 20 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4d4b6100f0e8..2bb986f305ac 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1443,9 +1443,28 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, ENTRY(page_fault) ASM_CLAC - pushl $do_page_fault - ALIGN - jmp common_exception + pushl $0; /* %gs's slot on the stack */ + + SAVE_ALL switch_stacks=1 skip_gs=1 + + ENCODE_FRAME_POINTER + UNWIND_ESPFIX_STACK + + /* fixup %gs */ + GS_TO_REG %ecx + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx + + GET_CR2_INTO(%ecx) # might clobber %eax + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + TRACE_IRQS_OFF + movl %esp, %eax # pt_regs pointer + call do_page_fault + jmp ret_from_exception END(page_fault) common_exception: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 95ae05f0edf2..7cb2e1f1ec09 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -864,7 +864,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) -.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0 +.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 .if \paranoid call paranoid_entry @@ -874,12 +874,21 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .endif UNWIND_HINT_REGS - .if \paranoid + .if \read_cr2 + GET_CR2_INTO(%rdx); /* can clobber %rax */ + .endif + .if \shift_ist != -1 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ .else TRACE_IRQS_OFF .endif + + .if \paranoid == 0 + testb $3, CS(%rsp) + jz .Lfrom_kernel_no_context_tracking_\@ + CALL_enter_from_user_mode +.Lfrom_kernel_no_context_tracking_\@: .endif movq %rsp, %rdi /* pt_regs pointer */ @@ -923,6 +932,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * fresh stack. (This is for #DB, which has a nasty habit * of recursing.) * @create_gap: create a 6-word stack gap when coming from kernel mode. + * @read_cr2: load CR2 into the 3rd argument; done before calling any C code * * idtentry generates an IDT stub that sets up a usable kernel context, * creates struct pt_regs, and calls @do_sym. The stub has the following @@ -947,7 +957,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -985,7 +995,7 @@ ENTRY(\sym) .Lfrom_usermode_no_gap_\@: .endif - idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset + idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset .if \paranoid == 1 /* @@ -994,7 +1004,7 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ .Lfrom_usermode_switch_stack_\@: - idtentry_part \do_sym, \has_error_code, paranoid=0 + idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 .endif _ASM_NOKPROBE(\sym) @@ -1006,7 +1016,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1179,10 +1189,10 @@ idtentry xenint3 do_int3 has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 -idtentry page_fault do_page_fault has_error_code=1 +idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 #ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 +idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 #endif #ifdef CONFIG_X86_MCE @@ -1281,18 +1291,9 @@ ENTRY(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - - /* - * We need to tell lockdep that IRQs are off. We can't do this until - * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). - */ - TRACE_IRQS_OFF - CALL_enter_from_user_mode ret .Lerror_entry_done: - TRACE_IRQS_OFF ret /* -- cgit v1.2.3 From e6dd47394493061c605285a868fc72eae2e9c866 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Jul 2019 20:36:40 -0500 Subject: x86/entry: Fix thunk function ELF sizes Fix the following warnings: arch/x86/entry/thunk_64.o: warning: objtool: trace_hardirqs_on_thunk() is missing an ELF size annotation arch/x86/entry/thunk_64.o: warning: objtool: trace_hardirqs_off_thunk() is missing an ELF size annotation arch/x86/entry/thunk_64.o: warning: objtool: lockdep_sys_exit_thunk() is missing an ELF size annotation Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/89c97adc9f6cc44a0f5d03cde6d0357662938909.1563413318.git.jpoimboe@redhat.com --- arch/x86/entry/thunk_64.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index cfdca8b42c70..cc20465b2867 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -12,9 +12,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 - .globl \name - .type \name, @function -\name: + ENTRY(\name) pushq %rbp movq %rsp, %rbp @@ -35,6 +33,7 @@ call \func jmp .L_restore + ENDPROC(\name) _ASM_NOKPROBE(\name) .endm -- cgit v1.2.3 From eec4844fae7c033a0c1fc1eb3b8517aeb8b6cc49 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 18 Jul 2019 15:58:50 -0700 Subject: proc/sysctl: add shared variables for range check In the sysctl code the proc_dointvec_minmax() function is often used to validate the user supplied value between an allowed range. This function uses the extra1 and extra2 members from struct ctl_table as minimum and maximum allowed value. On sysctl handler declaration, in every source file there are some readonly variables containing just an integer which address is assigned to the extra1 and extra2 members, so the sysctl range is enforced. The special values 0, 1 and INT_MAX are very often used as range boundary, leading duplication of variables like zero=0, one=1, int_max=INT_MAX in different source files: $ git grep -E '\.extra[12].*&(zero|one|int_max)' |wc -l 248 Add a const int array containing the most commonly used values, some macros to refer more easily to the correct array member, and use them instead of creating a local one for every object file. This is the bloat-o-meter output comparing the old and new binary compiled with the default Fedora config: # scripts/bloat-o-meter -d vmlinux.o.old vmlinux.o add/remove: 2/2 grow/shrink: 0/2 up/down: 24/-188 (-164) Data old new delta sysctl_vals - 12 +12 __kstrtab_sysctl_vals - 12 +12 max 14 10 -4 int_max 16 - -16 one 68 - -68 zero 128 28 -100 Total: Before=20583249, After=20583085, chg -0.00% [mcroce@redhat.com: tipc: remove two unused variables] Link: http://lkml.kernel.org/r/20190530091952.4108-1-mcroce@redhat.com [akpm@linux-foundation.org: fix net/ipv6/sysctl_net_ipv6.c] [arnd@arndb.de: proc/sysctl: make firmware loader table conditional] Link: http://lkml.kernel.org/r/20190617130014.1713870-1-arnd@arndb.de [akpm@linux-foundation.org: fix fs/eventpoll.c] Link: http://lkml.kernel.org/r/20190430180111.10688-1-mcroce@redhat.com Signed-off-by: Matteo Croce Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Reviewed-by: Aaron Tomlin Cc: Matthew Wilcox Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/entry/vdso/vdso32-setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990e..240626e7f55a 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -65,9 +65,6 @@ subsys_initcall(sysenter_setup); /* Register vsyscall32 into the ABI table */ #include -static const int zero; -static const int one = 1; - static struct ctl_table abi_table2[] = { { .procname = "vsyscall32", @@ -75,8 +72,8 @@ static struct ctl_table abi_table2[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (int *)&zero, - .extra2 = (int *)&one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, {} }; -- cgit v1.2.3 From 6879298bd0673840cadd1fb36d7225485504ceb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 20 Jul 2019 10:56:41 +0200 Subject: x86/entry/64: Prevent clobbering of saved CR2 value The recent fix for CR2 corruption introduced a new way to reliably corrupt the saved CR2 value. CR2 is saved early in the entry code in RDX, which is the third argument to the fault handling functions. But it missed that between saving and invoking the fault handler enter_from_user_mode() can be called. RDX is a caller saved register so the invoked function can freely clobber it with the obvious consequences. The TRACE_IRQS_OFF call is safe as it calls through the thunk which preserves RDX, but TRACE_IRQS_OFF_DEBUG is not because it also calls into C-code outside of the thunk. Store CR2 in R12 instead which is a callee saved register and move R12 to RDX just before calling the fault handler. Fixes: a0d14b8909de ("x86/mm, tracing: Fix CR2 corruption") Reported-by: Sean Christopherson Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907201020540.1782@nanos.tec.linutronix.de --- arch/x86/entry/entry_64.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7cb2e1f1ec09..f7c70c1bee8b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -875,7 +875,12 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt UNWIND_HINT_REGS .if \read_cr2 - GET_CR2_INTO(%rdx); /* can clobber %rax */ + /* + * Store CR2 early so subsequent faults cannot clobber it. Use R12 as + * intermediate storage as RDX can be clobbered in enter_from_user_mode(). + * GET_CR2_INTO can clobber RAX. + */ + GET_CR2_INTO(%r12); .endif .if \shift_ist != -1 @@ -904,6 +909,10 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt subq $\ist_offset, CPU_TSS_IST(\shift_ist) .endif + .if \read_cr2 + movq %r12, %rdx /* Move CR2 into 3rd argument */ + .endif + call \do_sym .if \shift_ist != -1 -- cgit v1.2.3 From b8f70953c1251d8b16276995816a95639f598e70 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Tue, 23 Jul 2019 21:20:58 -0700 Subject: x86/entry/32: Pass cr2 to do_async_page_fault() Commit a0d14b8909de ("x86/mm, tracing: Fix CR2 corruption") added the address parameter to do_async_page_fault(), but does not pass it from the 32-bit entry point. To plumb it through, factor-out common_exception_read_cr2 in the same fashion as common_exception, and uses it from both page_fault and async_page_fault. For a 32-bit KVM guest, this fixes: Run /sbin/init as init process Starting init: /sbin/init exists but couldn't execute it (error -14) Fixes: a0d14b8909de ("x86/mm, tracing: Fix CR2 corruption") Signed-off-by: Matt Mullins Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190724042058.24506-1-mmullins@fb.com --- arch/x86/entry/entry_32.S | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2bb986f305ac..4f86928246e7 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1443,8 +1443,12 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, ENTRY(page_fault) ASM_CLAC - pushl $0; /* %gs's slot on the stack */ + pushl $do_page_fault + jmp common_exception_read_cr2 +END(page_fault) +common_exception_read_cr2: + /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 ENCODE_FRAME_POINTER @@ -1452,6 +1456,7 @@ ENTRY(page_fault) /* fixup %gs */ GS_TO_REG %ecx + movl PT_GS(%esp), %edi REG_TO_PTGS %ecx SET_KERNEL_GS %ecx @@ -1463,9 +1468,9 @@ ENTRY(page_fault) TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call do_page_fault + CALL_NOSPEC %edi jmp ret_from_exception -END(page_fault) +END(common_exception_read_cr2) common_exception: /* the function address is in %gs's slot on the stack */ @@ -1595,7 +1600,7 @@ END(general_protection) ENTRY(async_page_fault) ASM_CLAC pushl $do_async_page_fault - jmp common_exception + jmp common_exception_read_cr2 END(async_page_fault) #endif -- cgit v1.2.3