diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-10 19:34:26 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-10 19:34:26 -0800 |
| commit | 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701 (patch) | |
| tree | 12b97016aeaf6d8e75db39705bf92b0250872788 | |
| parent | ca8f421ea0d3f1d39f773e14f68f93c978e470ef (diff) | |
| parent | ce9b1c10c3f1c723c3cc7b63aa8331fdb6c57a04 (diff) | |
Merge tag 'x86_entry_for_7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 entry code updates from Dave Hansen:
"This is entirely composed of a set of long overdue VDSO cleanups. They
makes the VDSO build much more logical and zap quite a bit of old
cruft.
It also results in a coveted net-code-removal diffstat"
* tag 'x86_entry_for_7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/vdso: Add vdso2c to .gitignore
x86/entry/vdso32: Omit '.cfi_offset eflags' for LLVM < 16
MAINTAINERS: Adjust vdso file entry in INTEL SGX
x86/entry/vdso/selftest: Update location of vgetrandom-chacha.S
x86/entry/vdso: Fix filtering of vdso compiler flags
x86/entry/vdso: Update the object paths for "make vdso_install"
x86/entry/vdso32: When using int $0x80, use it directly
x86/cpufeature: Replace X86_FEATURE_SYSENTER32 with X86_FEATURE_SYSFAST32
x86/vdso: Abstract out vdso system call internals
x86/entry/vdso: Include GNU_PROPERTY and GNU_STACK PHDRs
x86/entry/vdso32: Remove open-coded DWARF in sigreturn.S
x86/entry/vdso32: Remove SYSCALL_ENTER_KERNEL macro in sigreturn.S
x86/entry/vdso32: Don't rely on int80_landing_pad for adjusting ip
x86/entry/vdso: Refactor the vdso build
x86/entry/vdso: Move vdso2c to arch/x86/tools
x86/entry/vdso: Rename vdso_image_* to vdso*_image
50 files changed, 459 insertions, 494 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 13b291d801bc..8d3ed73dc330 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13032,7 +13032,7 @@ S: Supported Q: https://patchwork.kernel.org/project/intel-sgx/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/sgx F: Documentation/arch/x86/sgx.rst -F: arch/x86/entry/vdso/vsgx.S +F: arch/x86/entry/vdso/vdso64/vsgx.S F: arch/x86/include/asm/sgx.h F: arch/x86/include/uapi/asm/sgx.h F: arch/x86/kernel/cpu/sgx/* diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures index b435952249a0..532cbc276b1e 100644 --- a/arch/x86/Kconfig.cpufeatures +++ b/arch/x86/Kconfig.cpufeatures @@ -56,6 +56,10 @@ config X86_REQUIRED_FEATURE_MOVBE def_bool y depends on MATOM +config X86_REQUIRED_FEATURE_SYSFAST32 + def_bool y + depends on X86_64 && !X86_FRED + config X86_REQUIRED_FEATURE_CPUID def_bool y depends on X86_64 @@ -120,6 +124,10 @@ config X86_DISABLED_FEATURE_CENTAUR_MCR def_bool y depends on X86_64 +config X86_DISABLED_FEATURE_SYSCALL32 + def_bool y + depends on !X86_64 + config X86_DISABLED_FEATURE_PCID def_bool y depends on !X86_64 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1d403a3612ea..5f881460a8b5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -252,7 +252,7 @@ endif archscripts: scripts_basic - $(Q)$(MAKE) $(build)=arch/x86/tools relocs + $(Q)$(MAKE) $(build)=arch/x86/tools relocs vdso2c ### # Syscall table generation @@ -318,9 +318,9 @@ PHONY += install install: $(call cmd,install) -vdso-install-$(CONFIG_X86_64) += arch/x86/entry/vdso/vdso64.so.dbg -vdso-install-$(CONFIG_X86_X32_ABI) += arch/x86/entry/vdso/vdsox32.so.dbg -vdso-install-$(CONFIG_COMPAT_32) += arch/x86/entry/vdso/vdso32.so.dbg +vdso-install-$(CONFIG_X86_64) += arch/x86/entry/vdso/vdso64/vdso64.so.dbg +vdso-install-$(CONFIG_X86_X32_ABI) += arch/x86/entry/vdso/vdso64/vdsox32.so.dbg +vdso-install-$(CONFIG_COMPAT_32) += arch/x86/entry/vdso/vdso32/vdso32.so.dbg archprepare: checkbin checkbin: diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index a67a644d0cfe..8e829575e12f 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -319,7 +319,7 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) * convention. Adjust regs so it looks like we entered using int80. */ unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; + vdso32_image.sym_int80_landing_pad; /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore index 37a6129d597b..eb60859dbcbf 100644 --- a/arch/x86/entry/vdso/.gitignore +++ b/arch/x86/entry/vdso/.gitignore @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso.lds -vdsox32.lds -vdso32-syscall-syms.lds -vdso32-sysenter-syms.lds -vdso32-int80-syms.lds -vdso-image-*.c -vdso2c +*.lds +*.so +*.so.dbg +vdso*-image.c diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index f247f5f5cb44..987b43fd4cd3 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,160 +3,10 @@ # Building vDSO images for x86. # -# Include the generic Makefile to check the built vDSO: -include $(srctree)/lib/vdso/Makefile.include +# Regular kernel objects +obj-y := vma.o extable.o +obj-$(CONFIG_COMPAT_32) += vdso32-setup.o -# Files to link into the vDSO: -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o -vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o -vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o -vobjs-$(CONFIG_X86_SGX) += vsgx.o - -# Files to link into the kernel: -obj-y += vma.o extable.o - -# vDSO images to build: -obj-$(CONFIG_X86_64) += vdso-image-64.o -obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o -obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o - -vobjs := $(addprefix $(obj)/, $(vobjs-y)) -vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) - -$(obj)/vdso.o: $(obj)/vdso.so - -targets += vdso.lds $(vobjs-y) -targets += vdso32/vdso32.lds $(vobjs32-y) - -targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) - -CPPFLAGS_vdso.lds += -P -C - -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE - $(call if_changed,vdso_and_check) - -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi -hostprogs += vdso2c - -quiet_cmd_vdso2c = VDSO2C $@ - cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ - -$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE - $(call if_changed,vdso2c) - -# -# Don't omit frame pointers for ease of userspace debugging, but do -# optimize sibling calls. -# -CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ - $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ - -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - CFL += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) -$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO - -# -# vDSO code runs in userspace and -pg doesn't help with profiling anyway. -# -CFLAGS_REMOVE_vclock_gettime.o = -pg -CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg -CFLAGS_REMOVE_vgetcpu.o = -pg -CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg -CFLAGS_REMOVE_vsgx.o = -pg -CFLAGS_REMOVE_vgetrandom.o = -pg - -# -# X32 processes use x32 vDSO to access 64bit kernel data. -# -# Build x32 vDSO image: -# 1. Compile x32 vDSO as 64bit. -# 2. Convert object files to x32. -# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes -# so that it can reach 64bit address space with 64bit pointers. -# - -CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -# x32-rebranded versions -vobjx32s-y := $(vobjs-y:.o=-x32.o) - -# same thing, but in the output directory -vobjx32s := $(addprefix $(obj)/, $(vobjx32s-y)) - -# Convert 64bit object file to x32 for x32 vDSO. -quiet_cmd_x32 = X32 $@ - cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ - -$(obj)/%-x32.o: $(obj)/%.o FORCE - $(call if_changed,x32) - -targets += vdsox32.lds $(vobjx32s-y) - -$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE - $(call if_changed,vdso_and_check) - -CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO -$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 - -KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(KSTACK_ERASE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic -KBUILD_CFLAGS_32 += -fno-stack-protector -KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) -KBUILD_CFLAGS_32 += -fno-omit-frame-pointer -KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) - -$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE - $(call if_changed,vdso_and_check) - -# -# The DSO images are built using a special linker script. -# -quiet_cmd_vdso = VDSO $@ - cmd_vdso = $(LD) -o $@ \ - $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) - -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 --no-undefined \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) +# vDSO directories +obj-$(CONFIG_X86_64) += vdso64/ +obj-$(CONFIG_COMPAT_32) += vdso32/ diff --git a/arch/x86/entry/vdso/common/Makefile.include b/arch/x86/entry/vdso/common/Makefile.include new file mode 100644 index 000000000000..687b3d89b40d --- /dev/null +++ b/arch/x86/entry/vdso/common/Makefile.include @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Building vDSO images for x86. +# + +# Include the generic Makefile to check the built vDSO: +include $(srctree)/lib/vdso/Makefile.include + +obj-y += $(foreach x,$(vdsos-y),vdso$(x)-image.o) + +targets += $(foreach x,$(vdsos-y),vdso$(x)-image.c vdso$(x).so vdso$(x).so.dbg vdso$(x).lds) +targets += $(vobjs-y) + +# vobjs-y with $(obj)/ prepended +vobjs := $(addprefix $(obj)/,$(vobjs-y)) + +# Options for vdso*.lds +CPPFLAGS_VDSO_LDS := -P -C -I$(src)/.. +$(obj)/%.lds : KBUILD_CPPFLAGS += $(CPPFLAGS_VDSO_LDS) + +# +# Options from KBUILD_[AC]FLAGS that should *NOT* be kept +# +flags-remove-y += \ + -D__KERNEL__ -mcmodel=kernel -mregparm=3 \ + -fno-pic -fno-PIC -fno-pie -fno-PIE \ + -mfentry -pg \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(KSTACK_ERASE_CFLAGS) \ + $(RETPOLINE_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + $(PADDING_CFLAGS) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +flags-y += -D__DISABLE_EXPORTS +flags-y += -DDISABLE_BRANCH_PROFILING +flags-y += -DBUILD_VDSO +flags-y += -I$(src)/.. -I$(srctree) +flags-y += -O2 -fpic +flags-y += -fno-stack-protector +flags-y += -fno-omit-frame-pointer +flags-y += -foptimize-sibling-calls +flags-y += -fasynchronous-unwind-tables + +# Reset cf protections enabled by compiler default +flags-y += $(call cc-option, -fcf-protection=none) +flags-$(X86_USER_SHADOW_STACK) += $(call cc-option, -fcf-protection=return) +# When user space IBT is supported, enable this. +# flags-$(CONFIG_USER_IBT) += $(call cc-option, -fcf-protection=branch) + +flags-$(CONFIG_MITIGATION_RETPOLINE) += $(RETPOLINE_VDSO_CFLAGS) + +# These need to be conditional on $(vobjs) as they do not apply to +# the output vdso*-image.o files which are standard kernel objects. +$(vobjs) : KBUILD_AFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_AFLAGS)) $(flags-y) +$(vobjs) : KBUILD_CFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_CFLAGS)) $(flags-y) + +# +# The VDSO images are built using a special linker script. +# +VDSO_LDFLAGS := -shared --hash-style=both --build-id=sha1 --no-undefined \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack + +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(LD) -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$*) \ + -T $(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + +$(obj)/vdso%.so.dbg: $(obj)/vdso%.lds FORCE + $(call if_changed,vdso_and_check) + +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +VDSO2C = $(objtree)/arch/x86/tools/vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ + cmd_vdso2c = $(VDSO2C) $< $(<:%.dbg=%) $@ + +$(obj)/%-image.c: $(obj)/%.so.dbg $(obj)/%.so $(VDSO2C) FORCE + $(call if_changed,vdso2c) + +$(obj)/%-image.o: $(obj)/%-image.c diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/common/note.S index 79423170118f..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso-note.S +++ b/arch/x86/entry/vdso/common/note.S @@ -1,13 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. * Here we can supply some information useful to userland. */ #include <linux/build-salt.h> -#include <linux/uts.h> #include <linux/version.h> #include <linux/elfnote.h> +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c index 027b7e88d753..027b7e88d753 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/common/vclock_gettime.c diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/common/vdso-layout.lds.S index ec1ac191a057..a1e30be3e83d 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/common/vdso-layout.lds.S @@ -47,18 +47,18 @@ SECTIONS *(.gnu.linkonce.b.*) } :text - /* - * Discard .note.gnu.property sections which are unused and have - * different alignment requirement from vDSO note sections. - */ - /DISCARD/ : { + .note.gnu.property : { *(.note.gnu.property) - } - .note : { *(.note.*) } :text :note - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text + } :text :note :gnu_property + .note : { + *(.note*) + } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { + KEEP (*(.eh_frame)) + *(.eh_frame.*) + } :text /* * Text is well-separated from actual data: there's plenty of @@ -87,15 +87,23 @@ SECTIONS * Very old versions of ld do not recognize this name token; use the constant. */ #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_STACK 0x6474e551 +#define PT_GNU_PROPERTY 0x6474e553 /* * We must supply the ELF program headers explicitly to get just one * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ +*/ +#define PF_R FLAGS(4) +#define PF_RW FLAGS(6) +#define PF_RX FLAGS(5) + PHDRS { - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; + text PT_LOAD PF_RX FILEHDR PHDRS; + dynamic PT_DYNAMIC PF_R; + note PT_NOTE PF_R; + eh_frame_hdr PT_GNU_EH_FRAME PF_R; + gnu_stack PT_GNU_STACK PF_RW; + gnu_property PT_GNU_PROPERTY PF_R; } diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/common/vgetcpu.c index 6381b472b7c5..6381b472b7c5 100644 --- a/arch/x86/entry/vdso/vgetcpu.c +++ b/arch/x86/entry/vdso/common/vgetcpu.c diff --git a/arch/x86/entry/vdso/vdso32/Makefile b/arch/x86/entry/vdso/vdso32/Makefile new file mode 100644 index 000000000000..add6afb484ba --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 32-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += system_call.o sigreturn.o + +# Compilation flags +flags-y := -DBUILD_VDSO32 -m32 -mregparm=0 +flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h +flags-remove-y := -m64 + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# Linker options for the vdso +VDSO_LDFLAGS_32 := -m elf_i386 -soname linux-gate.so.1 + +$(obj)/vdso32.so.dbg: $(vobjs) diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index 2cbd39939dc6..62d8aa51ce99 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -1,18 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/build-salt.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -/* Ideally this would use UTS_NAME, but using a quoted string here - doesn't work. Remember to change this when changing the - kernel's name. */ -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END - -BUILD_SALT +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index 1bd068f72d4c..b433353bc8e3 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,140 +1,64 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/unistd_32.h> +#include <asm/dwarf2.h> #include <asm/asm-offsets.h> -#ifndef SYSCALL_ENTER_KERNEL -#define SYSCALL_ENTER_KERNEL int $0x80 +.macro STARTPROC_SIGNAL_FRAME sc + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + /* -4 as pretcode has already been popped */ + CFI_DEF_CFA esp, \sc - 4 + CFI_OFFSET eip, IA32_SIGCONTEXT_ip + CFI_OFFSET eax, IA32_SIGCONTEXT_ax + CFI_OFFSET ebx, IA32_SIGCONTEXT_bx + CFI_OFFSET ecx, IA32_SIGCONTEXT_cx + CFI_OFFSET edx, IA32_SIGCONTEXT_dx + CFI_OFFSET esp, IA32_SIGCONTEXT_sp + CFI_OFFSET ebp, IA32_SIGCONTEXT_bp + CFI_OFFSET esi, IA32_SIGCONTEXT_si + CFI_OFFSET edi, IA32_SIGCONTEXT_di + CFI_OFFSET es, IA32_SIGCONTEXT_es + CFI_OFFSET cs, IA32_SIGCONTEXT_cs + CFI_OFFSET ss, IA32_SIGCONTEXT_ss + CFI_OFFSET ds, IA32_SIGCONTEXT_ds +/* + * .cfi_offset eflags requires LLVM 16 or newer: + * + * https://github.com/llvm/llvm-project/commit/67bd3c58c0c7389e39c5a2f4d3b1a30459ccf5b7 + * + * Check for 16.0.1 to ensure the support is present, as 16.0.0 may be a + * prerelease version. + */ +#if defined(CONFIG_AS_IS_GNU) || (defined(CONFIG_AS_IS_LLVM) && CONFIG_AS_VERSION >= 160001) + CFI_OFFSET eflags, IA32_SIGCONTEXT_flags #endif +.endm .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function - nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: -.LSTART_sigreturn: - popl %eax /* XXX does this mean it needs unwind info? */ + STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext + popl %eax + CFI_ADJUST_CFA_OFFSET -4 movl $__NR_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_sigreturn: + int $0x80 SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_sigreturn,.-.LSTART_sigreturn + ud2a + CFI_ENDPROC + .size __kernel_sigreturn,.-__kernel_sigreturn .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function ALIGN __kernel_rt_sigreturn: -.LSTART_rt_sigreturn: + STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext movl $__NR_rt_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_rt_sigreturn: + int $0x80 SYM_INNER_LABEL(vdso32_rt_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn - .previous - - .section .eh_frame,"a",@progbits -.LSTARTFRAMEDLSI1: - .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 -.LSTARTCIEDLSI1: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0 /* DW_CFA_nop */ - .align 4 -.LENDCIEDLSI1: - .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ -.LSTARTFDEDLSI1: - .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: The dwarf2 unwind routines will subtract 1 from the - return address to get an address in the middle of the - presumed call instruction. Since we didn't get here via - a call, we need to include the nop before the real start - to make up for it. */ - .long .LSTART_sigreturn-1-. /* PC-relative start address */ - .long .LEND_sigreturn-.LSTART_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - complicated by the fact that the "CFA" is always assumed to - be the value of the stack pointer in the caller. This means - that we must define the CFA of this body of code to be the - saved value of the stack pointer in the sigcontext. Which - also means that there is no fixed relation to the other - saved registers, which means that we must use DW_CFA_expression - to compute their addresses. It also means that when we - adjust the stack with the popl, we have to do it all over again. */ - -#define do_cfa_expr(offset) \ - .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ - .byte 0x06; /* DW_OP_deref */ \ -1: - -#define do_expr(regno, offset) \ - .byte 0x10; /* DW_CFA_expression */ \ - .uleb128 regno; /* regno */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ -1: - - do_cfa_expr(IA32_SIGCONTEXT_sp+4) - do_expr(0, IA32_SIGCONTEXT_ax+4) - do_expr(1, IA32_SIGCONTEXT_cx+4) - do_expr(2, IA32_SIGCONTEXT_dx+4) - do_expr(3, IA32_SIGCONTEXT_bx+4) - do_expr(5, IA32_SIGCONTEXT_bp+4) - do_expr(6, IA32_SIGCONTEXT_si+4) - do_expr(7, IA32_SIGCONTEXT_di+4) - do_expr(8, IA32_SIGCONTEXT_ip+4) - - .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ - - do_cfa_expr(IA32_SIGCONTEXT_sp) - do_expr(0, IA32_SIGCONTEXT_ax) - do_expr(1, IA32_SIGCONTEXT_cx) - do_expr(2, IA32_SIGCONTEXT_dx) - do_expr(3, IA32_SIGCONTEXT_bx) - do_expr(5, IA32_SIGCONTEXT_bp) - do_expr(6, IA32_SIGCONTEXT_si) - do_expr(7, IA32_SIGCONTEXT_di) - do_expr(8, IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI1: - - .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ -.LSTARTFDEDLSI2: - .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: See above wrt unwind library assumptions. */ - .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ - .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - slightly less complicated than the above, since we don't - modify the stack pointer in the process. */ - - do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp) - do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax) - do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx) - do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx) - do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx) - do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp) - do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si) - do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di) - do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI2: + ud2a + CFI_ENDPROC + .size __kernel_rt_sigreturn,.-__kernel_rt_sigreturn .previous diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index d33c6513fd2c..9157cf9c5749 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -14,6 +14,18 @@ ALIGN __kernel_vsyscall: CFI_STARTPROC + + /* + * If using int $0x80, there is no reason to muck about with the + * stack here. Unfortunately just overwriting the push instructions + * would mess up the CFI annotations, but it is only a 3-byte + * NOP in that case. This could be avoided by patching the + * vdso symbol table (not the code) and entry point, but that + * would a fair bit of tooling work or by simply compiling + * two different vDSO images, but that doesn't seem worth it. + */ + ALTERNATIVE "int $0x80; ret", "", X86_FEATURE_SYSFAST32 + /* * Reshuffle regs so that all of any of the entry instructions * will preserve enough state. @@ -52,15 +64,9 @@ __kernel_vsyscall: #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" -#ifdef CONFIG_X86_64 - /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ - ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ - SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 -#else - ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP -#endif + ALTERNATIVE SYSENTER_SEQUENCE, SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 - /* Enter using int $0x80 */ + /* Re-enter using int $0x80 */ int $0x80 SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 86981decfea8..1481f0021b9f 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -1,4 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#define BUILD_VDSO32 -#include "fake_32bit_build.h" -#include "../vclock_gettime.c" +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 6f977c103584..55554f80d930 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -11,7 +11,7 @@ #define BUILD_VDSO32 -#include "../vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* The ELF entry point can be used to set the AT_SYSINFO value. */ ENTRY(__kernel_vsyscall); diff --git a/arch/x86/entry/vdso/vdso32/vgetcpu.c b/arch/x86/entry/vdso/vdso32/vgetcpu.c index 3a9791f5e998..00cc8325a020 100644 --- a/arch/x86/entry/vdso/vdso32/vgetcpu.c +++ b/arch/x86/entry/vdso/vdso32/vgetcpu.c @@ -1,3 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "fake_32bit_build.h" -#include "../vgetcpu.c" +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vdso64/Makefile b/arch/x86/entry/vdso/vdso64/Makefile new file mode 100644 index 000000000000..bfffaf1aeecc --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/Makefile @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 64-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 64 +vdsos-$(CONFIG_X86_X32_ABI) += x32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += vgetrandom.o vgetrandom-chacha.o +vobjs-$(CONFIG_X86_SGX) += vsgx.o + +# Compilation flags +flags-y := -DBUILD_VDSO64 -m64 -mcmodel=small + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +vobjsx32 = $(patsubst %.o,%-x32.o,$(vobjs)) +targets += $(patsubst %.o,%-x32.o,$(vobjs-y)) + +# Linker options for the vdso +VDSO_LDFLAGS_64 := -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 +VDSO_LDFLAGS_x32 := $(subst elf_x86_64,elf32_x86_64,$(VDSO_LDFLAGS_64)) + +$(obj)/vdso64.so.dbg: $(vobjs) +$(obj)/vdsox32.so.dbg: $(vobjsx32) diff --git a/arch/x86/entry/vdso/vdso64/note.S b/arch/x86/entry/vdso/vdso64/note.S new file mode 100644 index 000000000000..62d8aa51ce99 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/note.S @@ -0,0 +1 @@ +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso64/vclock_gettime.c b/arch/x86/entry/vdso/vdso64/vclock_gettime.c new file mode 100644 index 000000000000..1481f0021b9f --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vclock_gettime.c @@ -0,0 +1 @@ +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso64/vdso64.lds.S index 0bab5f4af6d1..5ce3f2b6373a 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdso64.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSO64 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S index 16a8050a4fb6..3dbd20c8dacc 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSOX32 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdso64/vgetcpu.c b/arch/x86/entry/vdso/vdso64/vgetcpu.c new file mode 100644 index 000000000000..00cc8325a020 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vgetcpu.c @@ -0,0 +1 @@ +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S index bcba5639b8ee..bcba5639b8ee 100644 --- a/arch/x86/entry/vdso/vgetrandom-chacha.S +++ b/arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vdso64/vgetrandom.c index 430862b8977c..6a95d36b12d9 100644 --- a/arch/x86/entry/vdso/vgetrandom.c +++ b/arch/x86/entry/vdso/vdso64/vgetrandom.c @@ -4,7 +4,7 @@ */ #include <linux/types.h> -#include "../../../../lib/vdso/getrandom.c" +#include "lib/vdso/getrandom.c" ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) { diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vdso64/vsgx.S index 37a3d4c02366..37a3d4c02366 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vdso64/vsgx.S diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index afe105b2f907..e7fd7517370f 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -65,16 +65,12 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, static void vdso_fix_landing(const struct vdso_image *image, struct vm_area_struct *new_vma) { - if (in_ia32_syscall() && image == &vdso_image_32) { - struct pt_regs *regs = current_pt_regs(); - unsigned long vdso_land = image->sym_int80_landing_pad; - unsigned long old_land_addr = vdso_land + - (unsigned long)current->mm->context.vdso; - - /* Fixing userspace landing - look at do_fast_syscall_32 */ - if (regs->ip == old_land_addr) - regs->ip = new_vma->vm_start + vdso_land; - } + struct pt_regs *regs = current_pt_regs(); + unsigned long ipoffset = regs->ip - + (unsigned long)current->mm->context.vdso; + + if (ipoffset < image->size) + regs->ip = new_vma->vm_start + ipoffset; } static int vdso_mremap(const struct vm_special_mapping *sm, @@ -230,7 +226,7 @@ static int load_vdso32(void) if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - return map_vdso(&vdso_image_32, 0); + return map_vdso(&vdso32_image, 0); } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -239,7 +235,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_64, 0); + return map_vdso(&vdso64_image, 0); } return load_vdso32(); @@ -252,7 +248,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) { if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_x32, 0); + return map_vdso(&vdsox32_image, 0); } if (IS_ENABLED(CONFIG_IA32_EMULATION)) @@ -267,7 +263,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) const struct vdso_image *image = current->mm->context.vdso_image; unsigned long vdso = (unsigned long) current->mm->context.vdso; - if (in_ia32_syscall() && image == &vdso_image_32) { + if (in_ia32_syscall() && image == &vdso32_image) { if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad || regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad) return true; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c3b53beb1300..63b0f9aa9b3e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ #define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ #define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_SYSFAST32 ( 3*32+15) /* sysenter/syscall in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 302e11b15da8..09c9684d3ad6 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -20,6 +20,7 @@ #define CFI_RESTORE_STATE .cfi_restore_state #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape +#define CFI_SIGNAL_FRAME .cfi_signal_frame #ifndef BUILD_VDSO /* diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 6c8fdc96be7e..2ba5f166e58f 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -361,7 +361,7 @@ else if (IS_ENABLED(CONFIG_IA32_EMULATION)) \ #define VDSO_ENTRY \ ((unsigned long)current->mm->context.vdso + \ - vdso_image_32.sym___kernel_vsyscall) + vdso32_image.sym___kernel_vsyscall) struct linux_binprm; diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index b7253ef3205a..e8afbe9faa5b 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -27,9 +27,9 @@ struct vdso_image { long sym_vdso32_rt_sigreturn_landing_pad; }; -extern const struct vdso_image vdso_image_64; -extern const struct vdso_image vdso_image_x32; -extern const struct vdso_image vdso_image_32; +extern const struct vdso_image vdso64_image; +extern const struct vdso_image vdsox32_image; +extern const struct vdso_image vdso32_image; extern int __init init_vdso_image(const struct vdso_image *image); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 73b2e7ee8f0f..3cf214cc4a75 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -18,6 +18,7 @@ #include <asm/msr.h> #include <asm/pvclock.h> #include <clocksource/hyperv_timer.h> +#include <asm/vdso/sys_call.h> #define VDSO_HAS_TIME 1 @@ -53,130 +54,37 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif -#ifndef BUILD_VDSO32 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { - long ret; - - asm ("syscall" : "=a" (ret), "=m" (*_ts) : - "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : - "rcx", "r11"); - - return ret; + return VDSO_SYSCALL2(clock_gettime,64,_clkid,_ts); } static __always_inline long gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) { - long ret; - - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); - - return ret; + return VDSO_SYSCALL2(gettimeofday,,_tv,_tz); } static __always_inline long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { - long ret; - - asm ("syscall" : "=a" (ret), "=m" (*_ts) : - "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : - "rcx", "r11"); - - return ret; + return VDSO_SYSCALL2(clock_getres,_time64,_clkid,_ts); } -#else - -static __always_inline -long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) -{ - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} +#ifndef CONFIG_X86_64 static __always_inline long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} - -static __always_inline -long gettimeofday_fallback(struct __kernel_old_timeval *_tv, - struct timezone *_tz) -{ - long ret; - - asm( - "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) - : "memory", "edx"); - - return ret; + return VDSO_SYSCALL2(clock_gettime,,_clkid,_ts); } static __always_inline long -clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) -{ - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; -} - -static __always_inline -long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*_ts) - : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) - : "edx"); - - return ret; + return VDSO_SYSCALL2(clock_getres,,_clkid,_ts); } #endif diff --git a/arch/x86/include/asm/vdso/sys_call.h b/arch/x86/include/asm/vdso/sys_call.h new file mode 100644 index 000000000000..5806b1cd6aef --- /dev/null +++ b/arch/x86/include/asm/vdso/sys_call.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macros for issuing an inline system call from the vDSO. + */ + +#ifndef X86_ASM_VDSO_SYS_CALL_H +#define X86_ASM_VDSO_SYS_CALL_H + +#include <linux/compiler.h> +#include <asm/cpufeatures.h> +#include <asm/alternative.h> + +#ifdef CONFIG_X86_64 +# define __sys_instr "syscall" +# define __sys_clobber "rcx", "r11", "memory" +# define __sys_nr(x,y) __NR_ ## x +# define __sys_reg1 "rdi" +# define __sys_reg2 "rsi" +# define __sys_reg3 "rdx" +# define __sys_reg4 "r10" +# define __sys_reg5 "r8" +#else +# define __sys_instr ALTERNATIVE("ds;ds;ds;int $0x80", \ + "call __kernel_vsyscall", \ + X86_FEATURE_SYSFAST32) +# define __sys_clobber "memory" +# define __sys_nr(x,y) __NR_ ## x ## y +# define __sys_reg1 "ebx" +# define __sys_reg2 "ecx" +# define __sys_reg3 "edx" +# define __sys_reg4 "esi" +# define __sys_reg5 "edi" +#endif + +/* + * Example usage: + * + * result = VDSO_SYSCALL3(foo,64,x,y,z); + * + * ... calls foo(x,y,z) on 64 bits, and foo64(x,y,z) on 32 bits. + * + * VDSO_SYSCALL6() is currently missing, because it would require + * special handling for %ebp on 32 bits when the vdso is compiled with + * frame pointers enabled (the default on 32 bits.) Add it as a special + * case when and if it becomes necessary. + */ +#define _VDSO_SYSCALL(name,suf32,...) \ + ({ \ + long _sys_num_ret = __sys_nr(name,suf32); \ + asm_inline volatile( \ + __sys_instr \ + : "+a" (_sys_num_ret) \ + : __VA_ARGS__ \ + : __sys_clobber); \ + _sys_num_ret; \ + }) + +#define VDSO_SYSCALL0(name,suf32) \ + _VDSO_SYSCALL(name,suf32) +#define VDSO_SYSCALL1(name,suf32,a1) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1)); \ + }) +#define VDSO_SYSCALL2(name,suf32,a1,a2) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2)); \ + }) +#define VDSO_SYSCALL3(name,suf32,a1,a2,a3) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3)); \ + }) +#define VDSO_SYSCALL4(name,suf32,a1,a2,a3,a4) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + register long _sys_arg4 asm(__sys_reg4) = (long)(a4); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3), "r" (_sys_arg4)); \ + }) +#define VDSO_SYSCALL5(name,suf32,a1,a2,a3,a4,a5) \ + ({ \ + register long _sys_arg1 asm(__sys_reg1) = (long)(a1); \ + register long _sys_arg2 asm(__sys_reg2) = (long)(a2); \ + register long _sys_arg3 asm(__sys_reg3) = (long)(a3); \ + register long _sys_arg4 asm(__sys_reg4) = (long)(a4); \ + register long _sys_arg5 asm(__sys_reg5) = (long)(a5); \ + _VDSO_SYSCALL(name,suf32, \ + "r" (_sys_arg1), "r" (_sys_arg2), \ + "r" (_sys_arg3), "r" (_sys_arg4), \ + "r" (_sys_arg5)); \ + }) + +#endif /* X86_VDSO_SYS_CALL_H */ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 25fcde525c68..081816888f7a 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -63,8 +63,14 @@ static void __used common(void) OFFSET(IA32_SIGCONTEXT_bp, sigcontext_32, bp); OFFSET(IA32_SIGCONTEXT_sp, sigcontext_32, sp); OFFSET(IA32_SIGCONTEXT_ip, sigcontext_32, ip); + OFFSET(IA32_SIGCONTEXT_es, sigcontext_32, es); + OFFSET(IA32_SIGCONTEXT_cs, sigcontext_32, cs); + OFFSET(IA32_SIGCONTEXT_ss, sigcontext_32, ss); + OFFSET(IA32_SIGCONTEXT_ds, sigcontext_32, ds); + OFFSET(IA32_SIGCONTEXT_flags, sigcontext_32, flags); BLANK(); + OFFSET(IA32_SIGFRAME_sigcontext, sigframe_ia32, sc); OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); #endif diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c8398940b975..81695da9c524 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -102,9 +102,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) (c->x86 >= 7)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#endif if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e7ab22fce3b5..1c3261cae40c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1068,6 +1068,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + if (IS_ENABLED(CONFIG_X86_64) || cpu_has(c, X86_FEATURE_SEP)) + set_cpu_cap(c, X86_FEATURE_SYSFAST32); + /* * Clear/Set all flags overridden by options, after probe. * This needs to happen each time we re-probe, which may happen @@ -1813,6 +1816,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) * that it can't be enabled in 32-bit mode. */ setup_clear_cpu_cap(X86_FEATURE_PCID); + + /* + * Never use SYSCALL on a 32-bit kernel + */ + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); #endif /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 98ae4c37c93e..646ff33c4651 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -236,9 +236,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_PSE); } -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else +#ifndef CONFIG_X86_64 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 89b1c8a70fe8..031379b7d4fa 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -59,9 +59,7 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) { if (c->x86 >= 0x6) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#endif + if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c index 816187da3a47..e736b19e18de 100644 --- a/arch/x86/kernel/fred.c +++ b/arch/x86/kernel/fred.c @@ -68,7 +68,7 @@ void cpu_init_fred_exceptions(void) idt_invalidate(); /* Use int $0x80 for 32-bit system calls in FRED mode */ - setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); + setup_clear_cpu_cap(X86_FEATURE_SYSFAST32); setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 432c0a004c60..08e72f429870 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -941,14 +941,14 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) #ifdef CONFIG_CHECKPOINT_RESTORE # ifdef CONFIG_X86_X32_ABI case ARCH_MAP_VDSO_X32: - return prctl_map_vdso(&vdso_image_x32, arg2); + return prctl_map_vdso(&vdsox32_image, arg2); # endif # ifdef CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: - return prctl_map_vdso(&vdso_image_32, arg2); + return prctl_map_vdso(&vdso32_image, arg2); # endif case ARCH_MAP_VDSO_64: - return prctl_map_vdso(&vdso_image_64, arg2); + return prctl_map_vdso(&vdso64_image, arg2); #endif #ifdef CONFIG_ADDRESS_MASKING case ARCH_GET_UNTAG_MASK: diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 42bbc42bd350..e55cf19e68fe 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -282,7 +282,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs) /* Return stub is in 32bit vsyscall page */ if (current->mm->context.vdso) restorer = current->mm->context.vdso + - vdso_image_32.sym___kernel_sigreturn; + vdso32_image.sym___kernel_sigreturn; else restorer = &frame->retcode; } @@ -368,7 +368,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) restorer = ksig->ka.sa.sa_restorer; else restorer = current->mm->context.vdso + - vdso_image_32.sym___kernel_rt_sigreturn; + vdso32_image.sym___kernel_rt_sigreturn; unsafe_put_user(ptr_to_compat(restorer), &frame->pretcode, Efault); /* diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore index d36dc7cf9115..51d5c22b38d7 100644 --- a/arch/x86/tools/.gitignore +++ b/arch/x86/tools/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only relocs +vdso2c diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 7278e2545c35..39a183fffd04 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -38,9 +38,14 @@ $(obj)/insn_decoder_test.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tool $(obj)/insn_sanity.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -hostprogs += relocs -relocs-objs := relocs_32.o relocs_64.o relocs_common.o -PHONY += relocs -relocs: $(obj)/relocs +HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi \ + -I$(srctree)/arch/$(SUBARCH)/include/uapi + +hostprogs += relocs vdso2c +relocs-objs := relocs_32.o relocs_64.o relocs_common.o + +always-y := $(hostprogs) + +PHONY += $(hostprogs) +$(hostprogs): %: $(obj)/% @: diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/tools/vdso2c.c index f84e8f8fa5fe..f84e8f8fa5fe 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/tools/vdso2c.c diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/tools/vdso2c.h index 78ed1c1f28b9..78ed1c1f28b9 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/tools/vdso2c.h diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3823e52aef52..ac8021c3a997 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -990,13 +990,6 @@ static int register_callback(unsigned type, const void *func) return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); } -void xen_enable_sysenter(void) -{ - if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && - register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) - setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); -} - void xen_enable_syscall(void) { int ret; @@ -1008,11 +1001,27 @@ void xen_enable_syscall(void) mechanism for syscalls. */ } - if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && - register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) + if (!cpu_feature_enabled(X86_FEATURE_SYSFAST32)) + return; + + if (cpu_feature_enabled(X86_FEATURE_SYSCALL32)) { + /* Use SYSCALL32 */ + ret = register_callback(CALLBACKTYPE_syscall32, + xen_entry_SYSCALL_compat); + + } else { + /* Use SYSENTER32 */ + ret = register_callback(CALLBACKTYPE_sysenter, + xen_entry_SYSENTER_compat); + } + + if (ret) { setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + setup_clear_cpu_cap(X86_FEATURE_SYSFAST32); + } } + static void __init xen_pvmmu_arch_setup(void) { HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); @@ -1022,7 +1031,6 @@ static void __init xen_pvmmu_arch_setup(void) register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) BUG(); - xen_enable_sysenter(); xen_enable_syscall(); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 9bb8ff8bff30..c40f326f0c3a 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -65,10 +65,9 @@ static void cpu_bringup(void) touch_softlockup_watchdog(); /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ - if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { - xen_enable_sysenter(); + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) xen_enable_syscall(); - } + cpu = smp_processor_id(); identify_secondary_cpu(cpu); set_cpu_sibling_map(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 090349baec09..f6c331b20fad 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -60,7 +60,6 @@ phys_addr_t __init xen_find_free_area(phys_addr_t size); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void xen_banner(void); -void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S index 8c3cbf4dfd6a..16f985b089d4 100644 --- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -16,5 +16,5 @@ #elif defined(__s390x__) #include "../../../../arch/s390/kernel/vdso/vgetrandom-chacha.S" #elif defined(__x86_64__) -#include "../../../../arch/x86/entry/vdso/vgetrandom-chacha.S" +#include "../../../../arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S" #endif |
