diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-10 19:34:26 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-10 19:34:26 -0800 |
| commit | 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701 (patch) | |
| tree | 12b97016aeaf6d8e75db39705bf92b0250872788 /arch/x86/entry | |
| parent | ca8f421ea0d3f1d39f773e14f68f93c978e470ef (diff) | |
| parent | ce9b1c10c3f1c723c3cc7b63aa8331fdb6c57a04 (diff) | |
Merge tag 'x86_entry_for_7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 entry code updates from Dave Hansen:
"This is entirely composed of a set of long overdue VDSO cleanups. They
makes the VDSO build much more logical and zap quite a bit of old
cruft.
It also results in a coveted net-code-removal diffstat"
* tag 'x86_entry_for_7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/vdso: Add vdso2c to .gitignore
x86/entry/vdso32: Omit '.cfi_offset eflags' for LLVM < 16
MAINTAINERS: Adjust vdso file entry in INTEL SGX
x86/entry/vdso/selftest: Update location of vgetrandom-chacha.S
x86/entry/vdso: Fix filtering of vdso compiler flags
x86/entry/vdso: Update the object paths for "make vdso_install"
x86/entry/vdso32: When using int $0x80, use it directly
x86/cpufeature: Replace X86_FEATURE_SYSENTER32 with X86_FEATURE_SYSFAST32
x86/vdso: Abstract out vdso system call internals
x86/entry/vdso: Include GNU_PROPERTY and GNU_STACK PHDRs
x86/entry/vdso32: Remove open-coded DWARF in sigreturn.S
x86/entry/vdso32: Remove SYSCALL_ENTER_KERNEL macro in sigreturn.S
x86/entry/vdso32: Don't rely on int80_landing_pad for adjusting ip
x86/entry/vdso: Refactor the vdso build
x86/entry/vdso: Move vdso2c to arch/x86/tools
x86/entry/vdso: Rename vdso_image_* to vdso*_image
Diffstat (limited to 'arch/x86/entry')
27 files changed, 273 insertions, 790 deletions
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index a67a644d0cfe..8e829575e12f 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -319,7 +319,7 @@ __visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) * convention. Adjust regs so it looks like we entered using int80. */ unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; + vdso32_image.sym_int80_landing_pad; /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore index 37a6129d597b..eb60859dbcbf 100644 --- a/arch/x86/entry/vdso/.gitignore +++ b/arch/x86/entry/vdso/.gitignore @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso.lds -vdsox32.lds -vdso32-syscall-syms.lds -vdso32-sysenter-syms.lds -vdso32-int80-syms.lds -vdso-image-*.c -vdso2c +*.lds +*.so +*.so.dbg +vdso*-image.c diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index f247f5f5cb44..987b43fd4cd3 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,160 +3,10 @@ # Building vDSO images for x86. # -# Include the generic Makefile to check the built vDSO: -include $(srctree)/lib/vdso/Makefile.include +# Regular kernel objects +obj-y := vma.o extable.o +obj-$(CONFIG_COMPAT_32) += vdso32-setup.o -# Files to link into the vDSO: -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o -vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o -vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o -vobjs-$(CONFIG_X86_SGX) += vsgx.o - -# Files to link into the kernel: -obj-y += vma.o extable.o - -# vDSO images to build: -obj-$(CONFIG_X86_64) += vdso-image-64.o -obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o -obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o - -vobjs := $(addprefix $(obj)/, $(vobjs-y)) -vobjs32 := $(addprefix $(obj)/, $(vobjs32-y)) - -$(obj)/vdso.o: $(obj)/vdso.so - -targets += vdso.lds $(vobjs-y) -targets += vdso32/vdso32.lds $(vobjs32-y) - -targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) - -CPPFLAGS_vdso.lds += -P -C - -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE - $(call if_changed,vdso_and_check) - -HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi -hostprogs += vdso2c - -quiet_cmd_vdso2c = VDSO2C $@ - cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ - -$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE - $(call if_changed,vdso2c) - -# -# Don't omit frame pointers for ease of userspace debugging, but do -# optimize sibling calls. -# -CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ - $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ - -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - CFL += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) -$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO - -# -# vDSO code runs in userspace and -pg doesn't help with profiling anyway. -# -CFLAGS_REMOVE_vclock_gettime.o = -pg -CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg -CFLAGS_REMOVE_vgetcpu.o = -pg -CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg -CFLAGS_REMOVE_vsgx.o = -pg -CFLAGS_REMOVE_vgetrandom.o = -pg - -# -# X32 processes use x32 vDSO to access 64bit kernel data. -# -# Build x32 vDSO image: -# 1. Compile x32 vDSO as 64bit. -# 2. Convert object files to x32. -# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes -# so that it can reach 64bit address space with 64bit pointers. -# - -CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 - -# x32-rebranded versions -vobjx32s-y := $(vobjs-y:.o=-x32.o) - -# same thing, but in the output directory -vobjx32s := $(addprefix $(obj)/, $(vobjx32s-y)) - -# Convert 64bit object file to x32 for x32 vDSO. -quiet_cmd_x32 = X32 $@ - cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ - -$(obj)/%-x32.o: $(obj)/%.o FORCE - $(call if_changed,x32) - -targets += vdsox32.lds $(vobjx32s-y) - -$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE - $(call if_changed,vdso_and_check) - -CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO -$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 - -KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(KSTACK_ERASE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) -KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic -KBUILD_CFLAGS_32 += -fno-stack-protector -KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) -KBUILD_CFLAGS_32 += -fno-omit-frame-pointer -KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += -DBUILD_VDSO - -ifdef CONFIG_MITIGATION_RETPOLINE -ifneq ($(RETPOLINE_VDSO_CFLAGS),) - KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) -endif -endif - -$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) - -$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE - $(call if_changed,vdso_and_check) - -# -# The DSO images are built using a special linker script. -# -quiet_cmd_vdso = VDSO $@ - cmd_vdso = $(LD) -o $@ \ - $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) - -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 --no-undefined \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack - -quiet_cmd_vdso_and_check = VDSO $@ - cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) +# vDSO directories +obj-$(CONFIG_X86_64) += vdso64/ +obj-$(CONFIG_COMPAT_32) += vdso32/ diff --git a/arch/x86/entry/vdso/common/Makefile.include b/arch/x86/entry/vdso/common/Makefile.include new file mode 100644 index 000000000000..687b3d89b40d --- /dev/null +++ b/arch/x86/entry/vdso/common/Makefile.include @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Building vDSO images for x86. +# + +# Include the generic Makefile to check the built vDSO: +include $(srctree)/lib/vdso/Makefile.include + +obj-y += $(foreach x,$(vdsos-y),vdso$(x)-image.o) + +targets += $(foreach x,$(vdsos-y),vdso$(x)-image.c vdso$(x).so vdso$(x).so.dbg vdso$(x).lds) +targets += $(vobjs-y) + +# vobjs-y with $(obj)/ prepended +vobjs := $(addprefix $(obj)/,$(vobjs-y)) + +# Options for vdso*.lds +CPPFLAGS_VDSO_LDS := -P -C -I$(src)/.. +$(obj)/%.lds : KBUILD_CPPFLAGS += $(CPPFLAGS_VDSO_LDS) + +# +# Options from KBUILD_[AC]FLAGS that should *NOT* be kept +# +flags-remove-y += \ + -D__KERNEL__ -mcmodel=kernel -mregparm=3 \ + -fno-pic -fno-PIC -fno-pie -fno-PIE \ + -mfentry -pg \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(KSTACK_ERASE_CFLAGS) \ + $(RETPOLINE_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + $(PADDING_CFLAGS) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +flags-y += -D__DISABLE_EXPORTS +flags-y += -DDISABLE_BRANCH_PROFILING +flags-y += -DBUILD_VDSO +flags-y += -I$(src)/.. -I$(srctree) +flags-y += -O2 -fpic +flags-y += -fno-stack-protector +flags-y += -fno-omit-frame-pointer +flags-y += -foptimize-sibling-calls +flags-y += -fasynchronous-unwind-tables + +# Reset cf protections enabled by compiler default +flags-y += $(call cc-option, -fcf-protection=none) +flags-$(X86_USER_SHADOW_STACK) += $(call cc-option, -fcf-protection=return) +# When user space IBT is supported, enable this. +# flags-$(CONFIG_USER_IBT) += $(call cc-option, -fcf-protection=branch) + +flags-$(CONFIG_MITIGATION_RETPOLINE) += $(RETPOLINE_VDSO_CFLAGS) + +# These need to be conditional on $(vobjs) as they do not apply to +# the output vdso*-image.o files which are standard kernel objects. +$(vobjs) : KBUILD_AFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_AFLAGS)) $(flags-y) +$(vobjs) : KBUILD_CFLAGS := \ + $(filter-out $(flags-remove-y),$(KBUILD_CFLAGS)) $(flags-y) + +# +# The VDSO images are built using a special linker script. +# +VDSO_LDFLAGS := -shared --hash-style=both --build-id=sha1 --no-undefined \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack + +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(LD) -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$*) \ + -T $(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + +$(obj)/vdso%.so.dbg: $(obj)/vdso%.lds FORCE + $(call if_changed,vdso_and_check) + +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +VDSO2C = $(objtree)/arch/x86/tools/vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ + cmd_vdso2c = $(VDSO2C) $< $(<:%.dbg=%) $@ + +$(obj)/%-image.c: $(obj)/%.so.dbg $(obj)/%.so $(VDSO2C) FORCE + $(call if_changed,vdso2c) + +$(obj)/%-image.o: $(obj)/%-image.c diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/common/note.S index 79423170118f..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso-note.S +++ b/arch/x86/entry/vdso/common/note.S @@ -1,13 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. * Here we can supply some information useful to userland. */ #include <linux/build-salt.h> -#include <linux/uts.h> #include <linux/version.h> #include <linux/elfnote.h> +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/common/vclock_gettime.c index 027b7e88d753..027b7e88d753 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/common/vclock_gettime.c diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/common/vdso-layout.lds.S index ec1ac191a057..a1e30be3e83d 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/common/vdso-layout.lds.S @@ -47,18 +47,18 @@ SECTIONS *(.gnu.linkonce.b.*) } :text - /* - * Discard .note.gnu.property sections which are unused and have - * different alignment requirement from vDSO note sections. - */ - /DISCARD/ : { + .note.gnu.property : { *(.note.gnu.property) - } - .note : { *(.note.*) } :text :note - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text + } :text :note :gnu_property + .note : { + *(.note*) + } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { + KEEP (*(.eh_frame)) + *(.eh_frame.*) + } :text /* * Text is well-separated from actual data: there's plenty of @@ -87,15 +87,23 @@ SECTIONS * Very old versions of ld do not recognize this name token; use the constant. */ #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_STACK 0x6474e551 +#define PT_GNU_PROPERTY 0x6474e553 /* * We must supply the ELF program headers explicitly to get just one * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ +*/ +#define PF_R FLAGS(4) +#define PF_RW FLAGS(6) +#define PF_RX FLAGS(5) + PHDRS { - text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; + text PT_LOAD PF_RX FILEHDR PHDRS; + dynamic PT_DYNAMIC PF_R; + note PT_NOTE PF_R; + eh_frame_hdr PT_GNU_EH_FRAME PF_R; + gnu_stack PT_GNU_STACK PF_RW; + gnu_property PT_GNU_PROPERTY PF_R; } diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/common/vgetcpu.c index 6381b472b7c5..6381b472b7c5 100644 --- a/arch/x86/entry/vdso/vgetcpu.c +++ b/arch/x86/entry/vdso/common/vgetcpu.c diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c deleted file mode 100644 index f84e8f8fa5fe..000000000000 --- a/arch/x86/entry/vdso/vdso2c.c +++ /dev/null @@ -1,233 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vdso2c - A vdso image preparation tool - * Copyright (c) 2014 Andy Lutomirski and others - * - * vdso2c requires stripped and unstripped input. It would be trivial - * to fully strip the input in here, but, for reasons described below, - * we need to write a section table. Doing this is more or less - * equivalent to dropping all non-allocatable sections, but it's - * easier to let objcopy handle that instead of doing it ourselves. - * If we ever need to do something fancier than what objcopy provides, - * it would be straightforward to add here. - * - * We're keep a section table for a few reasons: - * - * The Go runtime had a couple of bugs: it would read the section - * table to try to figure out how many dynamic symbols there were (it - * shouldn't have looked at the section table at all) and, if there - * were no SHT_SYNDYM section table entry, it would use an - * uninitialized value for the number of symbols. An empty DYNSYM - * table would work, but I see no reason not to write a valid one (and - * keep full performance for old Go programs). This hack is only - * needed on x86_64. - * - * The bug was introduced on 2012-08-31 by: - * https://code.google.com/p/go/source/detail?r=56ea40aac72b - * and was fixed on 2014-06-13 by: - * https://code.google.com/p/go/source/detail?r=fc1cd5e12595 - * - * Binutils has issues debugging the vDSO: it reads the section table to - * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which - * would break build-id if we removed the section table. Binutils - * also requires that shstrndx != 0. See: - * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 - * - * elfutils might not look for PT_NOTE if there is a section table at - * all. I don't know whether this matters for any practical purpose. - * - * For simplicity, rather than hacking up a partial section table, we - * just write a mostly complete one. We omit non-dynamic symbols, - * though, since they're rather large. - * - * Once binutils gets fixed, we might be able to drop this for all but - * the 64-bit vdso, since build-id only works in kernel RPMs, and - * systems that update to new enough kernel RPMs will likely update - * binutils in sync. build-id has never worked for home-built kernel - * RPMs without manual symlinking, and I suspect that no one ever does - * that. - */ - -#include <inttypes.h> -#include <stdint.h> -#include <unistd.h> -#include <stdarg.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <fcntl.h> -#include <err.h> - -#include <sys/mman.h> -#include <sys/types.h> - -#include <tools/le_byteshift.h> - -#include <linux/elf.h> -#include <linux/types.h> -#include <linux/kernel.h> - -const char *outfilename; - -struct vdso_sym { - const char *name; - bool export; -}; - -struct vdso_sym required_syms[] = { - {"VDSO32_NOTE_MASK", true}, - {"__kernel_vsyscall", true}, - {"__kernel_sigreturn", true}, - {"__kernel_rt_sigreturn", true}, - {"int80_landing_pad", true}, - {"vdso32_rt_sigreturn_landing_pad", true}, - {"vdso32_sigreturn_landing_pad", true}, -}; - -__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) -static void fail(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - fprintf(stderr, "Error: "); - vfprintf(stderr, format, ap); - if (outfilename) - unlink(outfilename); - exit(1); - va_end(ap); -} - -/* - * Evil macros for little-endian reads and writes - */ -#define GLE(x, bits, ifnot) \ - __builtin_choose_expr( \ - (sizeof(*(x)) == bits/8), \ - (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) - -extern void bad_get_le(void); -#define LAST_GLE(x) \ - __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) - -#define GET_LE(x) \ - GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) - -#define PLE(x, val, bits, ifnot) \ - __builtin_choose_expr( \ - (sizeof(*(x)) == bits/8), \ - put_unaligned_le##bits((val), (x)), ifnot) - -extern void bad_put_le(void); -#define LAST_PLE(x, val) \ - __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) - -#define PUT_LE(x, val) \ - PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) - - -#define NSYMS ARRAY_SIZE(required_syms) - -#define BITSFUNC3(name, bits, suffix) name##bits##suffix -#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) -#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, ) - -#define INT_BITS BITSFUNC2(int, ELF_BITS, _t) - -#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x -#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) -#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) - -#define ELF_BITS 64 -#include "vdso2c.h" -#undef ELF_BITS - -#define ELF_BITS 32 -#include "vdso2c.h" -#undef ELF_BITS - -static void go(void *raw_addr, size_t raw_len, - void *stripped_addr, size_t stripped_len, - FILE *outfile, const char *name) -{ - Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr; - - if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { - go64(raw_addr, raw_len, stripped_addr, stripped_len, - outfile, name); - } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { - go32(raw_addr, raw_len, stripped_addr, stripped_len, - outfile, name); - } else { - fail("unknown ELF class\n"); - } -} - -static void map_input(const char *name, void **addr, size_t *len, int prot) -{ - off_t tmp_len; - - int fd = open(name, O_RDONLY); - if (fd == -1) - err(1, "open(%s)", name); - - tmp_len = lseek(fd, 0, SEEK_END); - if (tmp_len == (off_t)-1) - err(1, "lseek"); - *len = (size_t)tmp_len; - - *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0); - if (*addr == MAP_FAILED) - err(1, "mmap"); - - close(fd); -} - -int main(int argc, char **argv) -{ - size_t raw_len, stripped_len; - void *raw_addr, *stripped_addr; - FILE *outfile; - char *name, *tmp; - int namelen; - - if (argc != 4) { - printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n"); - return 1; - } - - /* - * Figure out the struct name. If we're writing to a .so file, - * generate raw output instead. - */ - name = strdup(argv[3]); - namelen = strlen(name); - if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { - name = NULL; - } else { - tmp = strrchr(name, '/'); - if (tmp) - name = tmp + 1; - tmp = strchr(name, '.'); - if (tmp) - *tmp = '\0'; - for (tmp = name; *tmp; tmp++) - if (*tmp == '-') - *tmp = '_'; - } - - map_input(argv[1], &raw_addr, &raw_len, PROT_READ); - map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ); - - outfilename = argv[3]; - outfile = fopen(outfilename, "w"); - if (!outfile) - err(1, "fopen(%s)", outfilename); - - go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name); - - munmap(raw_addr, raw_len); - munmap(stripped_addr, stripped_len); - fclose(outfile); - - return 0; -} diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h deleted file mode 100644 index 78ed1c1f28b9..000000000000 --- a/arch/x86/entry/vdso/vdso2c.h +++ /dev/null @@ -1,208 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is included twice from vdso2c.c. It generates code for 32-bit - * and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs - * are built for 32-bit userspace. - */ - -static void BITSFUNC(copy)(FILE *outfile, const unsigned char *data, size_t len) -{ - size_t i; - - for (i = 0; i < len; i++) { - if (i % 10 == 0) - fprintf(outfile, "\n\t"); - fprintf(outfile, "0x%02X, ", (int)(data)[i]); - } -} - - -/* - * Extract a section from the input data into a standalone blob. Used to - * capture kernel-only data that needs to persist indefinitely, e.g. the - * exception fixup tables, but only in the kernel, i.e. the section can - * be stripped from the final vDSO image. - */ -static void BITSFUNC(extract)(const unsigned char *data, size_t data_len, - FILE *outfile, ELF(Shdr) *sec, const char *name) -{ - unsigned long offset; - size_t len; - - offset = (unsigned long)GET_LE(&sec->sh_offset); - len = (size_t)GET_LE(&sec->sh_size); - - if (offset + len > data_len) - fail("section to extract overruns input data"); - - fprintf(outfile, "static const unsigned char %s[%zu] = {", name, len); - BITSFUNC(copy)(outfile, data + offset, len); - fprintf(outfile, "\n};\n\n"); -} - -static void BITSFUNC(go)(void *raw_addr, size_t raw_len, - void *stripped_addr, size_t stripped_len, - FILE *outfile, const char *image_name) -{ - int found_load = 0; - unsigned long load_size = -1; /* Work around bogus warning */ - unsigned long mapping_size; - ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; - unsigned long i, syms_nr; - ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, - *alt_sec = NULL, *extable_sec = NULL; - ELF(Dyn) *dyn = 0, *dyn_end = 0; - const char *secstrings; - INT_BITS syms[NSYMS] = {}; - - ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); - - if (GET_LE(&hdr->e_type) != ET_DYN) - fail("input is not a shared object\n"); - - /* Walk the segment table. */ - for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { - if (GET_LE(&pt[i].p_type) == PT_LOAD) { - if (found_load) - fail("multiple PT_LOAD segs\n"); - - if (GET_LE(&pt[i].p_offset) != 0 || - GET_LE(&pt[i].p_vaddr) != 0) - fail("PT_LOAD in wrong place\n"); - - if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz)) - fail("cannot handle memsz != filesz\n"); - - load_size = GET_LE(&pt[i].p_memsz); - found_load = 1; - } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) { - dyn = raw_addr + GET_LE(&pt[i].p_offset); - dyn_end = raw_addr + GET_LE(&pt[i].p_offset) + - GET_LE(&pt[i].p_memsz); - } - } - if (!found_load) - fail("no PT_LOAD seg\n"); - - if (stripped_len < load_size) - fail("stripped input is too short\n"); - - if (!dyn) - fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n"); - - /* Walk the dynamic table */ - for (i = 0; dyn + i < dyn_end && - GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { - typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag); - if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA || - tag == DT_RELENT || tag == DT_TEXTREL) - fail("vdso image contains dynamic relocations\n"); - } - - /* Walk the section table */ - secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) + - GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); - secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset); - for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { - ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) + - GET_LE(&hdr->e_shentsize) * i; - if (GET_LE(&sh->sh_type) == SHT_SYMTAB) - symtab_hdr = sh; - - if (!strcmp(secstrings + GET_LE(&sh->sh_name), - ".altinstructions")) - alt_sec = sh; - if (!strcmp(secstrings + GET_LE(&sh->sh_name), "__ex_table")) - extable_sec = sh; - } - - if (!symtab_hdr) - fail("no symbol table\n"); - - strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) + - GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link); - - syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); - /* Walk the symbol table */ - for (i = 0; i < syms_nr; i++) { - unsigned int k; - ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) + - GET_LE(&symtab_hdr->sh_entsize) * i; - const char *sym_name = raw_addr + - GET_LE(&strtab_hdr->sh_offset) + - GET_LE(&sym->st_name); - - for (k = 0; k < NSYMS; k++) { - if (!strcmp(sym_name, required_syms[k].name)) { - if (syms[k]) { - fail("duplicate symbol %s\n", - required_syms[k].name); - } - - /* - * Careful: we use negative addresses, but - * st_value is unsigned, so we rely - * on syms[k] being a signed type of the - * correct width. - */ - syms[k] = GET_LE(&sym->st_value); - } - } - } - - if (!image_name) { - fwrite(stripped_addr, stripped_len, 1, outfile); - return; - } - - mapping_size = (stripped_len + 4095) / 4096 * 4096; - - fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); - fprintf(outfile, "#include <linux/linkage.h>\n"); - fprintf(outfile, "#include <linux/init.h>\n"); - fprintf(outfile, "#include <asm/page_types.h>\n"); - fprintf(outfile, "#include <asm/vdso.h>\n"); - fprintf(outfile, "\n"); - fprintf(outfile, - "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", - mapping_size); - for (i = 0; i < stripped_len; i++) { - if (i % 10 == 0) - fprintf(outfile, "\n\t"); - fprintf(outfile, "0x%02X, ", - (int)((unsigned char *)stripped_addr)[i]); - } - fprintf(outfile, "\n};\n\n"); - if (extable_sec) - BITSFUNC(extract)(raw_addr, raw_len, outfile, - extable_sec, "extable"); - - fprintf(outfile, "const struct vdso_image %s = {\n", image_name); - fprintf(outfile, "\t.data = raw_data,\n"); - fprintf(outfile, "\t.size = %lu,\n", mapping_size); - if (alt_sec) { - fprintf(outfile, "\t.alt = %lu,\n", - (unsigned long)GET_LE(&alt_sec->sh_offset)); - fprintf(outfile, "\t.alt_len = %lu,\n", - (unsigned long)GET_LE(&alt_sec->sh_size)); - } - if (extable_sec) { - fprintf(outfile, "\t.extable_base = %lu,\n", - (unsigned long)GET_LE(&extable_sec->sh_offset)); - fprintf(outfile, "\t.extable_len = %lu,\n", - (unsigned long)GET_LE(&extable_sec->sh_size)); - fprintf(outfile, "\t.extable = extable,\n"); - } - - for (i = 0; i < NSYMS; i++) { - if (required_syms[i].export && syms[i]) - fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", - required_syms[i].name, (int64_t)syms[i]); - } - fprintf(outfile, "};\n\n"); - fprintf(outfile, "static __init int init_%s(void) {\n", image_name); - fprintf(outfile, "\treturn init_vdso_image(&%s);\n", image_name); - fprintf(outfile, "};\n"); - fprintf(outfile, "subsys_initcall(init_%s);\n", image_name); - -} diff --git a/arch/x86/entry/vdso/vdso32/Makefile b/arch/x86/entry/vdso/vdso32/Makefile new file mode 100644 index 000000000000..add6afb484ba --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 32-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += system_call.o sigreturn.o + +# Compilation flags +flags-y := -DBUILD_VDSO32 -m32 -mregparm=0 +flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h +flags-remove-y := -m64 + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# Linker options for the vdso +VDSO_LDFLAGS_32 := -m elf_i386 -soname linux-gate.so.1 + +$(obj)/vdso32.so.dbg: $(vobjs) diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index 2cbd39939dc6..62d8aa51ce99 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -1,18 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/build-salt.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -/* Ideally this would use UTS_NAME, but using a quoted string here - doesn't work. Remember to change this when changing the - kernel's name. */ -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END - -BUILD_SALT +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index 1bd068f72d4c..b433353bc8e3 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,140 +1,64 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/unistd_32.h> +#include <asm/dwarf2.h> #include <asm/asm-offsets.h> -#ifndef SYSCALL_ENTER_KERNEL -#define SYSCALL_ENTER_KERNEL int $0x80 +.macro STARTPROC_SIGNAL_FRAME sc + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + /* -4 as pretcode has already been popped */ + CFI_DEF_CFA esp, \sc - 4 + CFI_OFFSET eip, IA32_SIGCONTEXT_ip + CFI_OFFSET eax, IA32_SIGCONTEXT_ax + CFI_OFFSET ebx, IA32_SIGCONTEXT_bx + CFI_OFFSET ecx, IA32_SIGCONTEXT_cx + CFI_OFFSET edx, IA32_SIGCONTEXT_dx + CFI_OFFSET esp, IA32_SIGCONTEXT_sp + CFI_OFFSET ebp, IA32_SIGCONTEXT_bp + CFI_OFFSET esi, IA32_SIGCONTEXT_si + CFI_OFFSET edi, IA32_SIGCONTEXT_di + CFI_OFFSET es, IA32_SIGCONTEXT_es + CFI_OFFSET cs, IA32_SIGCONTEXT_cs + CFI_OFFSET ss, IA32_SIGCONTEXT_ss + CFI_OFFSET ds, IA32_SIGCONTEXT_ds +/* + * .cfi_offset eflags requires LLVM 16 or newer: + * + * https://github.com/llvm/llvm-project/commit/67bd3c58c0c7389e39c5a2f4d3b1a30459ccf5b7 + * + * Check for 16.0.1 to ensure the support is present, as 16.0.0 may be a + * prerelease version. + */ +#if defined(CONFIG_AS_IS_GNU) || (defined(CONFIG_AS_IS_LLVM) && CONFIG_AS_VERSION >= 160001) + CFI_OFFSET eflags, IA32_SIGCONTEXT_flags #endif +.endm .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function - nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: -.LSTART_sigreturn: - popl %eax /* XXX does this mean it needs unwind info? */ + STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext + popl %eax + CFI_ADJUST_CFA_OFFSET -4 movl $__NR_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_sigreturn: + int $0x80 SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_sigreturn,.-.LSTART_sigreturn + ud2a + CFI_ENDPROC + .size __kernel_sigreturn,.-__kernel_sigreturn .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function ALIGN __kernel_rt_sigreturn: -.LSTART_rt_sigreturn: + STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext movl $__NR_rt_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_rt_sigreturn: + int $0x80 SYM_INNER_LABEL(vdso32_rt_sigreturn_landing_pad, SYM_L_GLOBAL) - nop - .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn - .previous - - .section .eh_frame,"a",@progbits -.LSTARTFRAMEDLSI1: - .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 -.LSTARTCIEDLSI1: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0 /* DW_CFA_nop */ - .align 4 -.LENDCIEDLSI1: - .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ -.LSTARTFDEDLSI1: - .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: The dwarf2 unwind routines will subtract 1 from the - return address to get an address in the middle of the - presumed call instruction. Since we didn't get here via - a call, we need to include the nop before the real start - to make up for it. */ - .long .LSTART_sigreturn-1-. /* PC-relative start address */ - .long .LEND_sigreturn-.LSTART_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - complicated by the fact that the "CFA" is always assumed to - be the value of the stack pointer in the caller. This means - that we must define the CFA of this body of code to be the - saved value of the stack pointer in the sigcontext. Which - also means that there is no fixed relation to the other - saved registers, which means that we must use DW_CFA_expression - to compute their addresses. It also means that when we - adjust the stack with the popl, we have to do it all over again. */ - -#define do_cfa_expr(offset) \ - .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ - .byte 0x06; /* DW_OP_deref */ \ -1: - -#define do_expr(regno, offset) \ - .byte 0x10; /* DW_CFA_expression */ \ - .uleb128 regno; /* regno */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ -1: - - do_cfa_expr(IA32_SIGCONTEXT_sp+4) - do_expr(0, IA32_SIGCONTEXT_ax+4) - do_expr(1, IA32_SIGCONTEXT_cx+4) - do_expr(2, IA32_SIGCONTEXT_dx+4) - do_expr(3, IA32_SIGCONTEXT_bx+4) - do_expr(5, IA32_SIGCONTEXT_bp+4) - do_expr(6, IA32_SIGCONTEXT_si+4) - do_expr(7, IA32_SIGCONTEXT_di+4) - do_expr(8, IA32_SIGCONTEXT_ip+4) - - .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ - - do_cfa_expr(IA32_SIGCONTEXT_sp) - do_expr(0, IA32_SIGCONTEXT_ax) - do_expr(1, IA32_SIGCONTEXT_cx) - do_expr(2, IA32_SIGCONTEXT_dx) - do_expr(3, IA32_SIGCONTEXT_bx) - do_expr(5, IA32_SIGCONTEXT_bp) - do_expr(6, IA32_SIGCONTEXT_si) - do_expr(7, IA32_SIGCONTEXT_di) - do_expr(8, IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI1: - - .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ -.LSTARTFDEDLSI2: - .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ - /* HACK: See above wrt unwind library assumptions. */ - .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ - .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - slightly less complicated than the above, since we don't - modify the stack pointer in the process. */ - - do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp) - do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax) - do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx) - do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx) - do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx) - do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp) - do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si) - do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di) - do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip) - - .align 4 -.LENDFDEDLSI2: + ud2a + CFI_ENDPROC + .size __kernel_rt_sigreturn,.-__kernel_rt_sigreturn .previous diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index d33c6513fd2c..9157cf9c5749 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -14,6 +14,18 @@ ALIGN __kernel_vsyscall: CFI_STARTPROC + + /* + * If using int $0x80, there is no reason to muck about with the + * stack here. Unfortunately just overwriting the push instructions + * would mess up the CFI annotations, but it is only a 3-byte + * NOP in that case. This could be avoided by patching the + * vdso symbol table (not the code) and entry point, but that + * would a fair bit of tooling work or by simply compiling + * two different vDSO images, but that doesn't seem worth it. + */ + ALTERNATIVE "int $0x80; ret", "", X86_FEATURE_SYSFAST32 + /* * Reshuffle regs so that all of any of the entry instructions * will preserve enough state. @@ -52,15 +64,9 @@ __kernel_vsyscall: #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" -#ifdef CONFIG_X86_64 - /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ - ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ - SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 -#else - ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP -#endif + ALTERNATIVE SYSENTER_SEQUENCE, SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 - /* Enter using int $0x80 */ + /* Re-enter using int $0x80 */ int $0x80 SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 86981decfea8..1481f0021b9f 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -1,4 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#define BUILD_VDSO32 -#include "fake_32bit_build.h" -#include "../vclock_gettime.c" +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 6f977c103584..55554f80d930 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -11,7 +11,7 @@ #define BUILD_VDSO32 -#include "../vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* The ELF entry point can be used to set the AT_SYSINFO value. */ ENTRY(__kernel_vsyscall); diff --git a/arch/x86/entry/vdso/vdso32/vgetcpu.c b/arch/x86/entry/vdso/vdso32/vgetcpu.c index 3a9791f5e998..00cc8325a020 100644 --- a/arch/x86/entry/vdso/vdso32/vgetcpu.c +++ b/arch/x86/entry/vdso/vdso32/vgetcpu.c @@ -1,3 +1 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "fake_32bit_build.h" -#include "../vgetcpu.c" +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vdso64/Makefile b/arch/x86/entry/vdso/vdso64/Makefile new file mode 100644 index 000000000000..bfffaf1aeecc --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/Makefile @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# 64-bit vDSO images for x86. +# + +# The vDSOs built in this directory +vdsos-y := 64 +vdsos-$(CONFIG_X86_X32_ABI) += x32 + +# Files to link into the vDSO: +vobjs-y := note.o vclock_gettime.o vgetcpu.o +vobjs-y += vgetrandom.o vgetrandom-chacha.o +vobjs-$(CONFIG_X86_SGX) += vsgx.o + +# Compilation flags +flags-y := -DBUILD_VDSO64 -m64 -mcmodel=small + +# The location of this include matters! +include $(src)/../common/Makefile.include + +# +# X32 processes use x32 vDSO to access 64bit kernel data. +# +# Build x32 vDSO image: +# 1. Compile x32 vDSO as 64bit. +# 2. Convert object files to x32. +# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes +# so that it can reach 64bit address space with 64bit pointers. +# + +# Convert 64bit object file to x32 for x32 vDSO. +quiet_cmd_x32 = X32 $@ + cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@ + +$(obj)/%-x32.o: $(obj)/%.o FORCE + $(call if_changed,x32) + +vobjsx32 = $(patsubst %.o,%-x32.o,$(vobjs)) +targets += $(patsubst %.o,%-x32.o,$(vobjs-y)) + +# Linker options for the vdso +VDSO_LDFLAGS_64 := -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 +VDSO_LDFLAGS_x32 := $(subst elf_x86_64,elf32_x86_64,$(VDSO_LDFLAGS_64)) + +$(obj)/vdso64.so.dbg: $(vobjs) +$(obj)/vdsox32.so.dbg: $(vobjsx32) diff --git a/arch/x86/entry/vdso/vdso64/note.S b/arch/x86/entry/vdso/vdso64/note.S new file mode 100644 index 000000000000..62d8aa51ce99 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/note.S @@ -0,0 +1 @@ +#include "common/note.S" diff --git a/arch/x86/entry/vdso/vdso64/vclock_gettime.c b/arch/x86/entry/vdso/vdso64/vclock_gettime.c new file mode 100644 index 000000000000..1481f0021b9f --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vclock_gettime.c @@ -0,0 +1 @@ +#include "common/vclock_gettime.c" diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso64/vdso64.lds.S index 0bab5f4af6d1..5ce3f2b6373a 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdso64.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSO64 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S index 16a8050a4fb6..3dbd20c8dacc 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdso64/vdsox32.lds.S @@ -9,7 +9,7 @@ #define BUILD_VDSOX32 -#include "vdso-layout.lds.S" +#include "common/vdso-layout.lds.S" /* * This controls what userland symbols we export from the vDSO. diff --git a/arch/x86/entry/vdso/vdso64/vgetcpu.c b/arch/x86/entry/vdso/vdso64/vgetcpu.c new file mode 100644 index 000000000000..00cc8325a020 --- /dev/null +++ b/arch/x86/entry/vdso/vdso64/vgetcpu.c @@ -0,0 +1 @@ +#include "common/vgetcpu.c" diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S index bcba5639b8ee..bcba5639b8ee 100644 --- a/arch/x86/entry/vdso/vgetrandom-chacha.S +++ b/arch/x86/entry/vdso/vdso64/vgetrandom-chacha.S diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vdso64/vgetrandom.c index 430862b8977c..6a95d36b12d9 100644 --- a/arch/x86/entry/vdso/vgetrandom.c +++ b/arch/x86/entry/vdso/vdso64/vgetrandom.c @@ -4,7 +4,7 @@ */ #include <linux/types.h> -#include "../../../../lib/vdso/getrandom.c" +#include "lib/vdso/getrandom.c" ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) { diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vdso64/vsgx.S index 37a3d4c02366..37a3d4c02366 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vdso64/vsgx.S diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index afe105b2f907..e7fd7517370f 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -65,16 +65,12 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, static void vdso_fix_landing(const struct vdso_image *image, struct vm_area_struct *new_vma) { - if (in_ia32_syscall() && image == &vdso_image_32) { - struct pt_regs *regs = current_pt_regs(); - unsigned long vdso_land = image->sym_int80_landing_pad; - unsigned long old_land_addr = vdso_land + - (unsigned long)current->mm->context.vdso; - - /* Fixing userspace landing - look at do_fast_syscall_32 */ - if (regs->ip == old_land_addr) - regs->ip = new_vma->vm_start + vdso_land; - } + struct pt_regs *regs = current_pt_regs(); + unsigned long ipoffset = regs->ip - + (unsigned long)current->mm->context.vdso; + + if (ipoffset < image->size) + regs->ip = new_vma->vm_start + ipoffset; } static int vdso_mremap(const struct vm_special_mapping *sm, @@ -230,7 +226,7 @@ static int load_vdso32(void) if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - return map_vdso(&vdso_image_32, 0); + return map_vdso(&vdso32_image, 0); } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -239,7 +235,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_64, 0); + return map_vdso(&vdso64_image, 0); } return load_vdso32(); @@ -252,7 +248,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) { if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_x32, 0); + return map_vdso(&vdsox32_image, 0); } if (IS_ENABLED(CONFIG_IA32_EMULATION)) @@ -267,7 +263,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) const struct vdso_image *image = current->mm->context.vdso_image; unsigned long vdso = (unsigned long) current->mm->context.vdso; - if (in_ia32_syscall() && image == &vdso_image_32) { + if (in_ia32_syscall() && image == &vdso32_image) { if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad || regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad) return true; |
