diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 10:53:44 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 10:53:44 -0700 |
| commit | f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9 (patch) | |
| tree | 2c1d858605001adedeff10f66f031e20da1db34d | |
| parent | c1fe867b5bf9c57ab7856486d342720e2b205eed (diff) | |
| parent | 7138a8698a39e81eb153e05500823fff76d5b3bd (diff) | |
Merge tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull vdso updates from Thomas Gleixner:
- Make the handling of compat functions consistent and more robust
- Rework the underlying data store so that it is dynamically allocated,
which allows the conversion of the last holdout SPARC64 to the
generic VDSO implementation
- Rework the SPARC64 VDSO to utilize the generic implementation
- Mop up the left overs of the non-generic VDSO support in the core
code
- Expand the VDSO selftest and make them more robust
- Allow time namespaces to be enabled independently of the generic VDSO
support, which was not possible before due to SPARC64 not using it
- Various cleanups and improvements in the related code
* tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits)
timens: Use task_lock guard in timens_get*()
timens: Use mutex guard in proc_timens_set_offset()
timens: Simplify some calls to put_time_ns()
timens: Add a __free() wrapper for put_time_ns()
timens: Remove dependency on the vDSO
vdso/timens: Move functions to new file
selftests: vDSO: vdso_test_correctness: Add a test for time()
selftests: vDSO: vdso_test_correctness: Use facilities from parse_vdso.c
selftests: vDSO: vdso_test_correctness: Handle different tv_usec types
selftests: vDSO: vdso_test_correctness: Drop SYS_getcpu fallbacks
selftests: vDSO: vdso_test_gettimeofday: Remove nolibc checks
Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers"
random: vDSO: Remove ifdeffery
random: vDSO: Trim vDSO includes
vdso/datapage: Trim down unnecessary includes
vdso/datapage: Remove inclusion of gettimeofday.h
vdso/helpers: Explicitly include vdso/processor.h
vdso/gettimeofday: Add explicit includes
random: vDSO: Add explicit includes
MIPS: vdso: Explicitly include asm/vdso/vdso.h
...
56 files changed, 829 insertions, 1291 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a54fedca451e..25d630e6e7bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10796,6 +10796,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso F: include/asm-generic/vdso/vsyscall.h F: include/vdso/ +F: kernel/time/namespace_vdso.c F: kernel/time/vsyscall.c F: lib/vdso/ F: tools/testing/selftests/vDSO/ @@ -21042,6 +21043,7 @@ F: include/trace/events/timer* F: kernel/time/itimer.c F: kernel/time/posix-* F: kernel/time/namespace.c +F: kernel/time/namespace_vdso.c POWER MANAGEMENT CORE M: "Rafael J. Wysocki" <rafael@kernel.org> diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 1e9f81639c88..26da5d8621cc 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -11,6 +11,8 @@ #include <asm/errno.h> #include <asm/unistd.h> #include <asm/vdso/cp15.h> +#include <vdso/clocksource.h> +#include <vdso/time32.h> #include <uapi/linux/time.h> #define VDSO_HAS_CLOCK_GETRES 1 diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 0d513f924321..a03e34b572f1 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -7,6 +7,9 @@ #ifndef __ASSEMBLER__ +#include <vdso/clocksource.h> +#include <vdso/time32.h> + #include <asm/barrier.h> #include <asm/unistd_compat_32.h> #include <asm/errno.h> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 3658a757e255..96d2eccd4995 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -9,6 +9,8 @@ #ifndef __ASSEMBLER__ +#include <vdso/clocksource.h> + #include <asm/alternative.h> #include <asm/arch_timer.h> #include <asm/barrier.h> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 4ac1c3086152..ac3a0baa5d00 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -52,6 +52,7 @@ #include <asm/switch_to.h> #include <asm/unwind.h> #include <asm/vdso.h> +#include <asm/vdso/vdso.h> #ifdef CONFIG_STACKPROTECTOR #include <linux/stackprotector.h> diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 0aa10cadb959..8ce8159c10b9 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <asm/vdso/vdso.h> #include <vdso/helpers.h> #include <vdso/vsyscall.h> #include <vdso/datapage.h> diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index 6889e0f2e5db..ef50d33f3439 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -4,6 +4,9 @@ * Author: Alex Smith <alex.smith@imgtec.com> */ +#ifndef __ASM_VDSO_VDSO_H +#define __ASM_VDSO_VDSO_H + #include <asm/sgidefs.h> #include <vdso/page.h> @@ -70,3 +73,5 @@ static inline void __iomem *get_gic(const struct vdso_time_data *data) #endif /* CONFIG_CLKSRC_MIPS_GIC */ #endif /* __ASSEMBLER__ */ + +#endif /* __ASM_VDSO_VDSO_H */ diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index de096777172f..2fa4df3e46e4 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -21,6 +21,7 @@ #include <asm/mips-cps.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm/vdso/vdso.h> #include <vdso/helpers.h> #include <vdso/vsyscall.h> diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 8ea397e26ad0..a853f853da6c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -8,6 +8,7 @@ #include <asm/barrier.h> #include <asm/unistd.h> #include <uapi/linux/time.h> +#include <vdso/time32.h> #define VDSO_HAS_CLOCK_GETRES 1 diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h index c1f3d7aaf3ee..4c6802c3a580 100644 --- a/arch/powerpc/include/asm/vdso/processor.h +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -4,6 +4,9 @@ #ifndef __ASSEMBLER__ +#include <asm/cputable.h> +#include <asm/feature-fixups.h> + /* Macros for adjusting thread priority (hardware multi-threading) */ #ifdef CONFIG_PPC64 #define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority") diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c index 57b38c592b9f..b4d81a57b2d9 100644 --- a/arch/powerpc/kernel/compat_audit.c +++ b/arch/powerpc/kernel/compat_audit.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#undef __powerpc64__ #include <linux/audit_arch.h> -#include <asm/unistd.h> +#include <asm/unistd_32.h> #include "audit_32.h" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index c8d16aca1cdc..297976b41088 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -12,8 +12,7 @@ LD_BFD := elf64-s390 KBUILD_LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_AFLAGS += -m64 -KBUILD_CFLAGS += -m64 +KBUILD_CPPFLAGS += -m64 KBUILD_CFLAGS += -fPIC LDFLAGS_vmlinux := $(call ld-option,-no-pie) extra_tools := relocs diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8699be91fca9..a6b787efc2c4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -104,7 +104,6 @@ config SPARC64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select GENERIC_TIME_VSYSCALL - select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_PTE_SPECIAL select PCI_DOMAINS if PCI select ARCH_HAS_GIGANTIC_PAGE @@ -115,6 +114,8 @@ config SPARC64 select ARCH_SUPPORTS_SCHED_SMT if SMP select ARCH_SUPPORTS_SCHED_MC if SMP select ARCH_HAS_LAZY_MMU_MODE + select HAVE_GENERIC_VDSO + select GENERIC_GETTIMEOFDAY config ARCH_PROC_KCORE_TEXT def_bool y diff --git a/arch/sparc/include/asm/clocksource.h b/arch/sparc/include/asm/clocksource.h index d63ef224befe..68303ad26eb2 100644 --- a/arch/sparc/include/asm/clocksource.h +++ b/arch/sparc/include/asm/clocksource.h @@ -5,13 +5,4 @@ #ifndef _ASM_SPARC_CLOCKSOURCE_H #define _ASM_SPARC_CLOCKSOURCE_H -/* VDSO clocksources */ -#define VCLOCK_NONE 0 /* Nothing userspace can do. */ -#define VCLOCK_TICK 1 /* Use %tick. */ -#define VCLOCK_STICK 2 /* Use %stick. */ - -struct arch_clocksource_data { - int vclock_mode; -}; - #endif /* _ASM_SPARC_CLOCKSOURCE_H */ diff --git a/arch/sparc/include/asm/processor.h b/arch/sparc/include/asm/processor.h index 18295ea625dd..e34de956519a 100644 --- a/arch/sparc/include/asm/processor.h +++ b/arch/sparc/include/asm/processor.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_PROCESSOR_H #define ___ASM_SPARC_PROCESSOR_H + +#include <asm/vdso/processor.h> + #if defined(__sparc__) && defined(__arch64__) #include <asm/processor_64.h> #else diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index ba8b70ffec08..a074d313f4f8 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -91,8 +91,6 @@ unsigned long __get_wchan(struct task_struct *); extern struct task_struct *last_task_used_math; int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); -#define cpu_relax() barrier() - extern void (*sparc_idle)(void); #endif diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 321859454ca4..485070495263 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -182,31 +182,6 @@ unsigned long __get_wchan(struct task_struct *task); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->tpc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->u_regs[UREG_FP]) -/* Please see the commentary in asm/backoff.h for a description of - * what these instructions are doing and how they have been chosen. - * To make a long story short, we are trying to yield the current cpu - * strand during busy loops. - */ -#ifdef BUILD_VDSO -#define cpu_relax() asm volatile("\n99:\n\t" \ - "rd %%ccr, %%g0\n\t" \ - "rd %%ccr, %%g0\n\t" \ - "rd %%ccr, %%g0\n\t" \ - ::: "memory") -#else /* ! BUILD_VDSO */ -#define cpu_relax() asm volatile("\n99:\n\t" \ - "rd %%ccr, %%g0\n\t" \ - "rd %%ccr, %%g0\n\t" \ - "rd %%ccr, %%g0\n\t" \ - ".section .pause_3insn_patch,\"ax\"\n\t"\ - ".word 99b\n\t" \ - "wr %%g0, 128, %%asr27\n\t" \ - "nop\n\t" \ - "nop\n\t" \ - ".previous" \ - ::: "memory") -#endif - /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has * a shallower prefetch queue than later chips. diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h index 59e79d35cd73..f08562d10215 100644 --- a/arch/sparc/include/asm/vdso.h +++ b/arch/sparc/include/asm/vdso.h @@ -8,8 +8,6 @@ struct vdso_image { void *data; unsigned long size; /* Always a multiple of PAGE_SIZE */ - - long sym_vvar_start; /* Negative offset to the vvar area */ }; #ifdef CONFIG_SPARC64 diff --git a/arch/sparc/include/asm/vdso/clocksource.h b/arch/sparc/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..007aa8ceaf52 --- /dev/null +++ b/arch/sparc/include/asm/vdso/clocksource.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +/* VDSO clocksources */ +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TICK, \ + VDSO_CLOCKMODE_STICK + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/sparc/include/asm/vdso/gettimeofday.h b/arch/sparc/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..b0c80c8a28bb --- /dev/null +++ b/arch/sparc/include/asm/vdso/gettimeofday.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + */ + +#ifndef _ASM_SPARC_VDSO_GETTIMEOFDAY_H +#define _ASM_SPARC_VDSO_GETTIMEOFDAY_H + +#include <uapi/linux/time.h> +#include <uapi/linux/unistd.h> + +#include <vdso/align.h> +#include <vdso/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/page.h> + +#include <linux/types.h> + +#ifdef CONFIG_SPARC64 +static __always_inline u64 vread_tick(void) +{ + u64 ret; + + __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); + return ret; +} + +static __always_inline u64 vread_tick_stick(void) +{ + u64 ret; + + __asm__ __volatile__("rd %%asr24, %0" : "=r" (ret)); + return ret; +} +#else +static __always_inline u64 vdso_shift_ns(u64 val, u32 amt) +{ + u64 ret; + + __asm__ __volatile__("sllx %H1, 32, %%g1\n\t" + "srl %L1, 0, %L1\n\t" + "or %%g1, %L1, %%g1\n\t" + "srlx %%g1, %2, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret) + : "r" (val), "r" (amt) + : "g1"); + return ret; +} +#define vdso_shift_ns vdso_shift_ns + +static __always_inline u64 vread_tick(void) +{ + register unsigned long long ret asm("o4"); + + __asm__ __volatile__("rd %%tick, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret)); + return ret; +} + +static __always_inline u64 vread_tick_stick(void) +{ + register unsigned long long ret asm("o4"); + + __asm__ __volatile__("rd %%asr24, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret)); + return ret; +} +#endif + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) +{ + if (likely(clock_mode == VDSO_CLOCKMODE_STICK)) + return vread_tick_stick(); + else + return vread_tick(); +} + +#ifdef CONFIG_SPARC64 +#define SYSCALL_STRING \ + "ta 0x6d;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif + +#define SYSCALL_CLOBBERS \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ + "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \ + "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \ + "cc", "memory" + +#ifdef CONFIG_SPARC64 + +static __always_inline +long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts) +{ + register long num __asm__("g1") = __NR_clock_gettime; + register long o0 __asm__("o0") = clock; + register long o1 __asm__("o1") = (long) ts; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +#else /* !CONFIG_SPARC64 */ + +static __always_inline +long clock_gettime_fallback(clockid_t clock, struct __kernel_timespec *ts) +{ + register long num __asm__("g1") = __NR_clock_gettime64; + register long o0 __asm__("o0") = clock; + register long o1 __asm__("o1") = (long) ts; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +static __always_inline +long clock_gettime32_fallback(clockid_t clock, struct old_timespec32 *ts) +{ + register long num __asm__("g1") = __NR_clock_gettime; + register long o0 __asm__("o0") = clock; + register long o1 __asm__("o1") = (long) ts; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +#endif /* CONFIG_SPARC64 */ + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + register long num __asm__("g1") = __NR_gettimeofday; + register long o0 __asm__("o0") = (long) tv; + register long o1 __asm__("o1") = (long) tz; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + unsigned long ret; + + /* + * SPARC does not support native PC-relative code relocations. + * Calculate the address manually, works for 32 and 64 bit code. + */ + __asm__ __volatile__( + "1:\n" + "call 3f\n" // Jump over the embedded data and set up %o7 + "nop\n" // Delay slot + "2:\n" + ".word vdso_u_time_data - .\n" // Embedded offset to external symbol + "3:\n" + "add %%o7, 2b - 1b, %%o7\n" // Point %o7 to the embedded offset + "ldsw [%%o7], %0\n" // Load the offset + "add %0, %%o7, %0\n" // Calculate the absolute address + : "=r" (ret) + : + : "o7"); + + return (const struct vdso_time_data *)ret; +} +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data + +#endif /* _ASM_SPARC_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/sparc/include/asm/vdso/processor.h b/arch/sparc/include/asm/vdso/processor.h new file mode 100644 index 000000000000..f7a9adc807f7 --- /dev/null +++ b/arch/sparc/include/asm/vdso/processor.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SPARC_VDSO_PROCESSOR_H +#define _ASM_SPARC_VDSO_PROCESSOR_H + +#include <linux/compiler.h> + +#if defined(__arch64__) + +/* Please see the commentary in asm/backoff.h for a description of + * what these instructions are doing and how they have been chosen. + * To make a long story short, we are trying to yield the current cpu + * strand during busy loops. + */ +#ifdef BUILD_VDSO +#define cpu_relax() asm volatile("\n99:\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + ::: "memory") +#else /* ! BUILD_VDSO */ +#define cpu_relax() asm volatile("\n99:\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + ".section .pause_3insn_patch,\"ax\"\n\t"\ + ".word 99b\n\t" \ + "wr %%g0, 128, %%asr27\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".previous" \ + ::: "memory") +#endif /* BUILD_VDSO */ + +#else /* ! __arch64__ */ + +#define cpu_relax() barrier() + +#endif /* __arch64__ */ + +#endif /* _ASM_SPARC_VDSO_PROCESSOR_H */ diff --git a/arch/sparc/include/asm/vdso/vsyscall.h b/arch/sparc/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..8bfe703fedc5 --- /dev/null +++ b/arch/sparc/include/asm/vdso/vsyscall.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SPARC_VDSO_VSYSCALL_H +#define _ASM_SPARC_VDSO_VSYSCALL_H + +#define __VDSO_PAGES 4 + +#include <asm-generic/vdso/vsyscall.h> + +#endif /* _ASM_SPARC_VDSO_VSYSCALL_H */ diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h deleted file mode 100644 index 6eaf5cfcaae1..000000000000 --- a/arch/sparc/include/asm/vvar.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _ASM_SPARC_VVAR_DATA_H -#define _ASM_SPARC_VVAR_DATA_H - -#include <asm/clocksource.h> -#include <asm/processor.h> -#include <asm/barrier.h> -#include <linux/time.h> -#include <linux/types.h> - -struct vvar_data { - unsigned int seq; - - int vclock_mode; - struct { /* extract of a clocksource struct */ - u64 cycle_last; - u64 mask; - int mult; - int shift; - } clock; - /* open coded 'struct timespec' */ - u64 wall_time_sec; - u64 wall_time_snsec; - u64 monotonic_time_snsec; - u64 monotonic_time_sec; - u64 monotonic_time_coarse_sec; - u64 monotonic_time_coarse_nsec; - u64 wall_time_coarse_sec; - u64 wall_time_coarse_nsec; - - int tz_minuteswest; - int tz_dsttime; -}; - -extern struct vvar_data *vvar_data; -extern int vdso_fix_stick; - -static inline unsigned int vvar_read_begin(const struct vvar_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); /* Finish all reads before we return seq */ - return ret; -} - -static inline int vvar_read_retry(const struct vvar_data *s, - unsigned int start) -{ - smp_rmb(); /* Finish all reads before checking the value of seq */ - return unlikely(s->seq != start); -} - -static inline void vvar_write_begin(struct vvar_data *s) -{ - ++s->seq; - smp_wmb(); /* Makes sure that increment of seq is reflected */ -} - -static inline void vvar_write_end(struct vvar_data *s) -{ - smp_wmb(); /* Makes the value of seq current before we increment */ - ++s->seq; -} - - -#endif /* _ASM_SPARC_VVAR_DATA_H */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 22170d4f8e06..497b5714fa8f 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_SPARC32) += systbls_32.o obj-y += time_$(BITS).o obj-$(CONFIG_SPARC32) += windows.o obj-y += cpu.o -obj-$(CONFIG_SPARC64) += vdso.o obj-$(CONFIG_SPARC32) += devices.o obj-y += ptrace_$(BITS).o obj-y += unaligned_$(BITS).o diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index b32f27f929d1..87b267043ccd 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -838,14 +838,14 @@ void __init time_init_early(void) if (tlb_type == spitfire) { if (is_hummingbird()) { init_tick_ops(&hbtick_operations); - clocksource_tick.archdata.vclock_mode = VCLOCK_NONE; + clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_NONE; } else { init_tick_ops(&tick_operations); - clocksource_tick.archdata.vclock_mode = VCLOCK_TICK; + clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_TICK; } } else { init_tick_ops(&stick_operations); - clocksource_tick.archdata.vclock_mode = VCLOCK_STICK; + clocksource_tick.vdso_clock_mode = VDSO_CLOCKMODE_STICK; } } diff --git a/arch/sparc/kernel/vdso.c b/arch/sparc/kernel/vdso.c deleted file mode 100644 index 0e27437eb97b..000000000000 --- a/arch/sparc/kernel/vdso.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - */ - -#include <linux/time.h> -#include <linux/timekeeper_internal.h> - -#include <asm/vvar.h> - -void update_vsyscall_tz(void) -{ - if (unlikely(vvar_data == NULL)) - return; - - vvar_data->tz_minuteswest = sys_tz.tz_minuteswest; - vvar_data->tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - struct vvar_data *vdata = vvar_data; - - if (unlikely(vdata == NULL)) - return; - - vvar_write_begin(vdata); - vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->tkr_mono.cycle_last; - vdata->clock.mask = tk->tkr_mono.mask; - vdata->clock.mult = tk->tkr_mono.mult; - vdata->clock.shift = tk->tkr_mono.shift; - - vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; - - vdata->monotonic_time_sec = tk->xtime_sec + - tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + - (tk->wall_to_monotonic.tv_nsec << - tk->tkr_mono.shift); - - while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - vdata->monotonic_time_sec++; - } - - vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = - (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - - vdata->monotonic_time_coarse_sec = - vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_coarse_nsec = - vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; - - while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { - vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; - vdata->monotonic_time_coarse_sec++; - } - - vvar_write_end(vdata); -} diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 683b2d408224..83fb2aca59cb 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -3,6 +3,9 @@ # Building vDSO images for sparc. # +# Include the generic Makefile to check the built vDSO: +include $(srctree)/lib/vdso/Makefile.include + # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o @@ -90,6 +93,9 @@ KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS_32 += -mv8plus $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) +CHECKFLAGS_32 := $(filter-out -m64 -D__sparc_v9__ -D__arch64__, $(CHECKFLAGS)) -m32 +$(obj)/vdso32.so.dbg: CHECKFLAGS = $(CHECKFLAGS_32) + $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vclock_gettime.o \ @@ -102,6 +108,7 @@ $(obj)/vdso32.so.dbg: FORCE \ quiet_cmd_vdso = VDSO $@ cmd_vdso = $(LD) -nostdlib -o $@ \ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) + -T $(filter %.lds,$^) $(filter %.o,$^); \ + $(cmd_vdso_check) -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined -z noexecstack diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index 79607804ea1b..1d9859392e4c 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -12,382 +12,48 @@ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. */ -#include <linux/kernel.h> -#include <linux/time.h> -#include <linux/string.h> -#include <asm/io.h> -#include <asm/unistd.h> -#include <asm/timex.h> -#include <asm/clocksource.h> -#include <asm/vvar.h> +#include <linux/compiler.h> +#include <linux/types.h> -#ifdef CONFIG_SPARC64 -#define SYSCALL_STRING \ - "ta 0x6d;" \ - "bcs,a 1f;" \ - " sub %%g0, %%o0, %%o0;" \ - "1:" -#else -#define SYSCALL_STRING \ - "ta 0x10;" \ - "bcs,a 1f;" \ - " sub %%g0, %%o0, %%o0;" \ - "1:" -#endif - -#define SYSCALL_CLOBBERS \ - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ - "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ - "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ - "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ - "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \ - "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \ - "cc", "memory" - -/* - * Compute the vvar page's address in the process address space, and return it - * as a pointer to the vvar_data. - */ -notrace static __always_inline struct vvar_data *get_vvar_data(void) -{ - unsigned long ret; - - /* - * vdso data page is the first vDSO page so grab the PC - * and move up a page to get to the data page. - */ - __asm__("rd %%pc, %0" : "=r" (ret)); - ret &= ~(8192 - 1); - ret -= 8192; - - return (struct vvar_data *) ret; -} +#include <vdso/gettime.h> -notrace static long vdso_fallback_gettime(long clock, struct __kernel_old_timespec *ts) -{ - register long num __asm__("g1") = __NR_clock_gettime; - register long o0 __asm__("o0") = clock; - register long o1 __asm__("o1") = (long) ts; +#include <asm/vdso/gettimeofday.h> - __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), - "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); - return o0; -} +#include "../../../../lib/vdso/gettimeofday.c" -notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - register long num __asm__("g1") = __NR_gettimeofday; - register long o0 __asm__("o0") = (long) tv; - register long o1 __asm__("o1") = (long) tz; - - __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), - "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); - return o0; + return __cvdso_gettimeofday(tv, tz); } -#ifdef CONFIG_SPARC64 -notrace static __always_inline u64 __shr64(u64 val, int amt) -{ - return val >> amt; -} +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __weak __alias(__vdso_gettimeofday); -notrace static __always_inline u64 vread_tick(void) +#if defined(CONFIG_SPARC64) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - u64 ret; - - __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); - return ret; + return __cvdso_clock_gettime(clock, ts); } -notrace static __always_inline u64 vread_tick_stick(void) -{ - u64 ret; +int clock_gettime(clockid_t, struct __kernel_timespec *) + __weak __alias(__vdso_clock_gettime); - __asm__ __volatile__("rd %%asr24, %0" : "=r" (ret)); - return ret; -} #else -notrace static __always_inline u64 __shr64(u64 val, int amt) -{ - u64 ret; - - __asm__ __volatile__("sllx %H1, 32, %%g1\n\t" - "srl %L1, 0, %L1\n\t" - "or %%g1, %L1, %%g1\n\t" - "srlx %%g1, %2, %L0\n\t" - "srlx %L0, 32, %H0" - : "=r" (ret) - : "r" (val), "r" (amt) - : "g1"); - return ret; -} - -notrace static __always_inline u64 vread_tick(void) -{ - register unsigned long long ret asm("o4"); - - __asm__ __volatile__("rd %%tick, %L0\n\t" - "srlx %L0, 32, %H0" - : "=r" (ret)); - return ret; -} - -notrace static __always_inline u64 vread_tick_stick(void) -{ - register unsigned long long ret asm("o4"); - - __asm__ __volatile__("rd %%asr24, %L0\n\t" - "srlx %L0, 32, %H0" - : "=r" (ret)); - return ret; -} -#endif -notrace static __always_inline u64 vgetsns(struct vvar_data *vvar) +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - u64 v; - u64 cycles; - - cycles = vread_tick(); - v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask; - return v * vvar->clock.mult; + return __cvdso_clock_gettime32(clock, ts); } -notrace static __always_inline u64 vgetsns_stick(struct vvar_data *vvar) -{ - u64 v; - u64 cycles; +int clock_gettime(clockid_t, struct old_timespec32 *) + __weak __alias(__vdso_clock_gettime); - cycles = vread_tick_stick(); - v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask; - return v * vvar->clock.mult; -} - -notrace static __always_inline int do_realtime(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { - unsigned long seq; - u64 ns; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->wall_time_sec; - ns = vvar->wall_time_snsec; - ns += vgetsns(vvar); - ns = __shr64(ns, vvar->clock.shift); - } while (unlikely(vvar_read_retry(vvar, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; + return __cvdso_clock_gettime(clock, ts); } -notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) -{ - unsigned long seq; - u64 ns; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->wall_time_sec; - ns = vvar->wall_time_snsec; - ns += vgetsns_stick(vvar); - ns = __shr64(ns, vvar->clock.shift); - } while (unlikely(vvar_read_retry(vvar, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __weak __alias(__vdso_clock_gettime64); - return 0; -} - -notrace static __always_inline int do_monotonic(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) -{ - unsigned long seq; - u64 ns; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->monotonic_time_sec; - ns = vvar->monotonic_time_snsec; - ns += vgetsns(vvar); - ns = __shr64(ns, vvar->clock.shift); - } while (unlikely(vvar_read_retry(vvar, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} - -notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) -{ - unsigned long seq; - u64 ns; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->monotonic_time_sec; - ns = vvar->monotonic_time_snsec; - ns += vgetsns_stick(vvar); - ns = __shr64(ns, vvar->clock.shift); - } while (unlikely(vvar_read_retry(vvar, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} - -notrace static int do_realtime_coarse(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) -{ - unsigned long seq; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->wall_time_coarse_sec; - ts->tv_nsec = vvar->wall_time_coarse_nsec; - } while (unlikely(vvar_read_retry(vvar, seq))); - return 0; -} - -notrace static int do_monotonic_coarse(struct vvar_data *vvar, - struct __kernel_old_timespec *ts) -{ - unsigned long seq; - - do { - seq = vvar_read_begin(vvar); - ts->tv_sec = vvar->monotonic_time_coarse_sec; - ts->tv_nsec = vvar->monotonic_time_coarse_nsec; - } while (unlikely(vvar_read_retry(vvar, seq))); - - return 0; -} - -notrace int -__vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) -{ - struct vvar_data *vvd = get_vvar_data(); - - switch (clock) { - case CLOCK_REALTIME: - if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) - break; - return do_realtime(vvd, ts); - case CLOCK_MONOTONIC: - if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) - break; - return do_monotonic(vvd, ts); - case CLOCK_REALTIME_COARSE: - return do_realtime_coarse(vvd, ts); - case CLOCK_MONOTONIC_COARSE: - return do_monotonic_coarse(vvd, ts); - } - /* - * Unknown clock ID ? Fall back to the syscall. - */ - return vdso_fallback_gettime(clock, ts); -} -int -clock_gettime(clockid_t, struct __kernel_old_timespec *) - __attribute__((weak, alias("__vdso_clock_gettime"))); - -notrace int -__vdso_clock_gettime_stick(clockid_t clock, struct __kernel_old_timespec *ts) -{ - struct vvar_data *vvd = get_vvar_data(); - - switch (clock) { - case CLOCK_REALTIME: - if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) - break; - return do_realtime_stick(vvd, ts); - case CLOCK_MONOTONIC: - if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) - break; - return do_monotonic_stick(vvd, ts); - case CLOCK_REALTIME_COARSE: - return do_realtime_coarse(vvd, ts); - case CLOCK_MONOTONIC_COARSE: - return do_monotonic_coarse(vvd, ts); - } - /* - * Unknown clock ID ? Fall back to the syscall. - */ - return vdso_fallback_gettime(clock, ts); -} - -notrace int -__vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) -{ - struct vvar_data *vvd = get_vvar_data(); - - if (likely(vvd->vclock_mode != VCLOCK_NONE)) { - if (likely(tv != NULL)) { - union tstv_t { - struct __kernel_old_timespec ts; - struct __kernel_old_timeval tv; - } *tstv = (union tstv_t *) tv; - do_realtime(vvd, &tstv->ts); - /* - * Assign before dividing to ensure that the division is - * done in the type of tv_usec, not tv_nsec. - * - * There cannot be > 1 billion usec in a second: - * do_realtime() has already distributed such overflow - * into tv_sec. So we can assign it to an int safely. - */ - tstv->tv.tv_usec = tstv->ts.tv_nsec; - tstv->tv.tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = vvd->tz_minuteswest; - tz->tz_dsttime = vvd->tz_dsttime; - } - return 0; - } - return vdso_fallback_gettimeofday(tv, tz); -} -int -gettimeofday(struct __kernel_old_timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); - -notrace int -__vdso_gettimeofday_stick(struct __kernel_old_timeval *tv, struct timezone *tz) -{ - struct vvar_data *vvd = get_vvar_data(); - - if (likely(vvd->vclock_mode != VCLOCK_NONE)) { - if (likely(tv != NULL)) { - union tstv_t { - struct __kernel_old_timespec ts; - struct __kernel_old_timeval tv; - } *tstv = (union tstv_t *) tv; - do_realtime_stick(vvd, &tstv->ts); - /* - * Assign before dividing to ensure that the division is - * done in the type of tv_usec, not tv_nsec. - * - * There cannot be > 1 billion usec in a second: - * do_realtime() has already distributed such overflow - * into tv_sec. So we can assign it to an int safely. - */ - tstv->tv.tv_usec = tstv->ts.tv_nsec; - tstv->tv.tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - /* Avoid memcpy. Some old compilers fail to inline it */ - tz->tz_minuteswest = vvd->tz_minuteswest; - tz->tz_dsttime = vvd->tz_dsttime; - } - return 0; - } - return vdso_fallback_gettimeofday(tv, tz); -} +#endif diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S index d31e57e8a3bb..180e5d0ee071 100644 --- a/arch/sparc/vdso/vdso-layout.lds.S +++ b/arch/sparc/vdso/vdso-layout.lds.S @@ -4,15 +4,9 @@ * This script controls its layout. */ -#if defined(BUILD_VDSO64) -# define SHDR_SIZE 64 -#elif defined(BUILD_VDSO32) -# define SHDR_SIZE 40 -#else -# error unknown VDSO target -#endif - -#define NUM_FAKE_SHDRS 7 +#include <vdso/datapage.h> +#include <vdso/page.h> +#include <asm/vdso/vsyscall.h> SECTIONS { @@ -23,8 +17,7 @@ SECTIONS * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries */ - vvar_start = . -8192; - vvar_data = vvar_start; + VDSO_VVAR_SYMS . = SIZEOF_HEADERS; @@ -47,19 +40,8 @@ SECTIONS *(.bss*) *(.dynbss*) *(.gnu.linkonce.b.*) - - /* - * Ideally this would live in a C file: kept in here for - * compatibility with x86-64. - */ - VDSO_FAKE_SECTION_TABLE_START = .; - . = . + NUM_FAKE_SHDRS * SHDR_SIZE; - VDSO_FAKE_SECTION_TABLE_END = .; } :text - .fake_shstrtab : { *(.fake_shstrtab) } :text - - .note : { *(.note.*) } :text :note .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S index 629ab6900df7..f3caa29a331c 100644 --- a/arch/sparc/vdso/vdso.lds.S +++ b/arch/sparc/vdso/vdso.lds.S @@ -18,10 +18,8 @@ VERSION { global: clock_gettime; __vdso_clock_gettime; - __vdso_clock_gettime_stick; gettimeofday; __vdso_gettimeofday; - __vdso_gettimeofday_stick; local: *; }; } diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c index dc81240aab6f..e5c61214a0e2 100644 --- a/arch/sparc/vdso/vdso2c.c +++ b/arch/sparc/vdso/vdso2c.c @@ -58,28 +58,6 @@ const char *outfilename; -/* Symbols that we need in vdso2c. */ -enum { - sym_vvar_start, - sym_VDSO_FAKE_SECTION_TABLE_START, - sym_VDSO_FAKE_SECTION_TABLE_END, -}; - -struct vdso_sym { - const char *name; - int export; -}; - -struct vdso_sym required_syms[] = { - [sym_vvar_start] = {"vvar_start", 1}, - [sym_VDSO_FAKE_SECTION_TABLE_START] = { - "VDSO_FAKE_SECTION_TABLE_START", 0 - }, - [sym_VDSO_FAKE_SECTION_TABLE_END] = { - "VDSO_FAKE_SECTION_TABLE_END", 0 - }, -}; - __attribute__((format(printf, 1, 2))) __attribute__((noreturn)) static void fail(const char *format, ...) { @@ -119,8 +97,6 @@ static void fail(const char *format, ...) #define PUT_BE(x, val) \ PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val)))) -#define NSYMS ARRAY_SIZE(required_syms) - #define BITSFUNC3(name, bits, suffix) name##bits##suffix #define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) #define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, ) diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h index 60d69acc748f..bad6a0593f4c 100644 --- a/arch/sparc/vdso/vdso2c.h +++ b/arch/sparc/vdso/vdso2c.h @@ -17,11 +17,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, unsigned long mapping_size; int i; unsigned long j; - ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr; + ELF(Shdr) *symtab_hdr = NULL; ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; ELF(Dyn) *dyn = 0, *dyn_end = 0; - INT_BITS syms[NSYMS] = {}; - ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff)); /* Walk the segment table. */ @@ -72,42 +70,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, if (!symtab_hdr) fail("no symbol table\n"); - strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) + - GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link); - - /* Walk the symbol table */ - for (i = 0; - i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize); - i++) { - int k; - - ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) + - GET_BE(&symtab_hdr->sh_entsize) * i; - const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) + - GET_BE(&sym->st_name); - - for (k = 0; k < NSYMS; k++) { - if (!strcmp(name, required_syms[k].name)) { - if (syms[k]) { - fail("duplicate symbol %s\n", - required_syms[k].name); - } - - /* - * Careful: we use negative addresses, but - * st_value is unsigned, so we rely - * on syms[k] being a signed type of the - * correct width. - */ - syms[k] = GET_BE(&sym->st_value); - } - } - } - - /* Validate mapping addresses. */ - if (syms[sym_vvar_start] % 8192) - fail("vvar_begin must be a multiple of 8192\n"); - if (!name) { fwrite(stripped_addr, stripped_len, 1, outfile); return; @@ -133,10 +95,5 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name); fprintf(outfile, "\t.data = raw_data,\n"); fprintf(outfile, "\t.size = %lu,\n", mapping_size); - for (i = 0; i < NSYMS; i++) { - if (required_syms[i].export && syms[i]) - fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", - required_syms[i].name, (int64_t)syms[i]); - } fprintf(outfile, "};\n"); } diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S index 218930fdff03..a14e4f77e6f2 100644 --- a/arch/sparc/vdso/vdso32/vdso32.lds.S +++ b/arch/sparc/vdso/vdso32/vdso32.lds.S @@ -17,10 +17,10 @@ VERSION { global: clock_gettime; __vdso_clock_gettime; - __vdso_clock_gettime_stick; + clock_gettime64; + __vdso_clock_gettime64; gettimeofday; __vdso_gettimeofday; - __vdso_gettimeofday_stick; local: *; }; } diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index c454689ce5fa..60029d60f4d3 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -16,17 +16,16 @@ #include <linux/linkage.h> #include <linux/random.h> #include <linux/elf.h> +#include <linux/vdso_datastore.h> #include <asm/cacheflush.h> #include <asm/spitfire.h> #include <asm/vdso.h> -#include <asm/vvar.h> #include <asm/page.h> -unsigned int __read_mostly vdso_enabled = 1; +#include <vdso/datapage.h> +#include <asm/vdso/vsyscall.h> -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]" -}; +unsigned int __read_mostly vdso_enabled = 1; #ifdef CONFIG_SPARC64 static struct vm_special_mapping vdso_mapping64 = { @@ -40,207 +39,8 @@ static struct vm_special_mapping vdso_mapping32 = { }; #endif -struct vvar_data *vvar_data; - -struct vdso_elfinfo32 { - Elf32_Ehdr *hdr; - Elf32_Sym *dynsym; - unsigned long dynsymsize; - const char *dynstr; - unsigned long text; -}; - -struct vdso_elfinfo64 { - Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - const char *dynstr; - unsigned long text; -}; - -struct vdso_elfinfo { - union { - struct vdso_elfinfo32 elf32; - struct vdso_elfinfo64 elf64; - } u; -}; - -static void *one_section64(struct vdso_elfinfo64 *e, const char *name, - unsigned long *size) -{ - const char *snames; - Elf64_Shdr *shdrs; - unsigned int i; - - shdrs = (void *)e->hdr + e->hdr->e_shoff; - snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset; - for (i = 1; i < e->hdr->e_shnum; i++) { - if (!strcmp(snames+shdrs[i].sh_name, name)) { - if (size) - *size = shdrs[i].sh_size; - return (void *)e->hdr + shdrs[i].sh_offset; - } - } - return NULL; -} - -static int find_sections64(const struct vdso_image *image, struct vdso_elfinfo *_e) -{ - struct vdso_elfinfo64 *e = &_e->u.elf64; - - e->hdr = image->data; - e->dynsym = one_section64(e, ".dynsym", &e->dynsymsize); - e->dynstr = one_section64(e, ".dynstr", NULL); - - if (!e->dynsym || !e->dynstr) { - pr_err("VDSO64: Missing symbol sections.\n"); - return -ENODEV; - } - return 0; -} - -static Elf64_Sym *find_sym64(const struct vdso_elfinfo64 *e, const char *name) -{ - unsigned int i; - - for (i = 0; i < (e->dynsymsize / sizeof(Elf64_Sym)); i++) { - Elf64_Sym *s = &e->dynsym[i]; - if (s->st_name == 0) - continue; - if (!strcmp(e->dynstr + s->st_name, name)) - return s; - } - return NULL; -} - -static int patchsym64(struct vdso_elfinfo *_e, const char *orig, - const char *new) -{ - struct vdso_elfinfo64 *e = &_e->u.elf64; - Elf64_Sym *osym = find_sym64(e, orig); - Elf64_Sym *nsym = find_sym64(e, new); - - if (!nsym || !osym) { - pr_err("VDSO64: Missing symbols.\n"); - return -ENODEV; - } - osym->st_value = nsym->st_value; - osym->st_size = nsym->st_size; - osym->st_info = nsym->st_info; - osym->st_other = nsym->st_other; - osym->st_shndx = nsym->st_shndx; - - return 0; -} - -static void *one_section32(struct vdso_elfinfo32 *e, const char *name, - unsigned long *size) -{ - const char *snames; - Elf32_Shdr *shdrs; - unsigned int i; - - shdrs = (void *)e->hdr + e->hdr->e_shoff; - snames = (void *)e->hdr + shdrs[e->hdr->e_shstrndx].sh_offset; - for (i = 1; i < e->hdr->e_shnum; i++) { - if (!strcmp(snames+shdrs[i].sh_name, name)) { - if (size) - *size = shdrs[i].sh_size; - return (void *)e->hdr + shdrs[i].sh_offset; - } - } - return NULL; -} - -static int find_sections32(const struct vdso_image *image, struct vdso_elfinfo *_e) -{ - struct vdso_elfinfo32 *e = &_e->u.elf32; - - e->hdr = image->data; - e->dynsym = one_section32(e, ".dynsym", &e->dynsymsize); - e->dynstr = one_section32(e, ".dynstr", NULL); - - if (!e->dynsym || !e->dynstr) { - pr_err("VDSO32: Missing symbol sections.\n"); - return -ENODEV; - } - return 0; -} - -static Elf32_Sym *find_sym32(const struct vdso_elfinfo32 *e, const char *name) -{ - unsigned int i; - - for (i = 0; i < (e->dynsymsize / sizeof(Elf32_Sym)); i++) { - Elf32_Sym *s = &e->dynsym[i]; - if (s->st_name == 0) - continue; - if (!strcmp(e->dynstr + s->st_name, name)) - return s; - } - return NULL; -} - -static int patchsym32(struct vdso_elfinfo *_e, const char *orig, - const char *new) -{ - struct vdso_elfinfo32 *e = &_e->u.elf32; - Elf32_Sym *osym = find_sym32(e, orig); - Elf32_Sym *nsym = find_sym32(e, new); - - if (!nsym || !osym) { - pr_err("VDSO32: Missing symbols.\n"); - return -ENODEV; - } - osym->st_value = nsym->st_value; - osym->st_size = nsym->st_size; - osym->st_info = nsym->st_info; - osym->st_other = nsym->st_other; - osym->st_shndx = nsym->st_shndx; - - return 0; -} - -static int find_sections(const struct vdso_image *image, struct vdso_elfinfo *e, - bool elf64) -{ - if (elf64) - return find_sections64(image, e); - else - return find_sections32(image, e); -} - -static int patch_one_symbol(struct vdso_elfinfo *e, const char *orig, - const char *new_target, bool elf64) -{ - if (elf64) - return patchsym64(e, orig, new_target); - else - return patchsym32(e, orig, new_target); -} - -static int stick_patch(const struct vdso_image *image, struct vdso_elfinfo *e, bool elf64) -{ - int err; - - err = find_sections(image, e, elf64); - if (err) - return err; - - err = patch_one_symbol(e, - "__vdso_gettimeofday", - "__vdso_gettimeofday_stick", elf64); - if (err) - return err; - - return patch_one_symbol(e, - "__vdso_clock_gettime", - "__vdso_clock_gettime_stick", elf64); - return 0; -} - /* - * Allocate pages for the vdso and vvar, and copy in the vdso text from the + * Allocate pages for the vdso and copy in the vdso text from the * kernel image. */ static int __init init_vdso_image(const struct vdso_image *image, @@ -248,16 +48,8 @@ static int __init init_vdso_image(const struct vdso_image *image, bool elf64) { int cnpages = (image->size) / PAGE_SIZE; - struct page *dp, **dpp = NULL; struct page *cp, **cpp = NULL; - struct vdso_elfinfo ei; - int i, dnpages = 0; - - if (tlb_type != spitfire) { - int err = stick_patch(image, &ei, elf64); - if (err) - return err; - } + int i; /* * First, the vdso text. This is initialied data, an integral number of @@ -280,31 +72,6 @@ static int __init init_vdso_image(const struct vdso_image *image, copy_page(page_address(cp), image->data + i * PAGE_SIZE); } - /* - * Now the vvar page. This is uninitialized data. - */ - - if (vvar_data == NULL) { - dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1; - if (WARN_ON(dnpages != 1)) - goto oom; - dpp = kzalloc_objs(struct page *, dnpages); - vvar_mapping.pages = dpp; - - if (!dpp) - goto oom; - - dp = alloc_page(GFP_KERNEL); - if (!dp) - goto oom; - - dpp[0] = dp; - vvar_data = page_address(dp); - memset(vvar_data, 0, PAGE_SIZE); - - vvar_data->seq = 0; - } - return 0; oom: if (cpp != NULL) { @@ -316,15 +83,6 @@ static int __init init_vdso_image(const struct vdso_image *image, vdso_mapping->pages = NULL; } - if (dpp != NULL) { - for (i = 0; i < dnpages; i++) { - if (dpp[i] != NULL) - __free_page(dpp[i]); - } - kfree(dpp); - vvar_mapping.pages = NULL; - } - pr_warn("Cannot allocate vdso\n"); vdso_enabled = 0; return -ENOMEM; @@ -359,9 +117,12 @@ static unsigned long vdso_addr(unsigned long start, unsigned int len) return start + (offset << PAGE_SHIFT); } +static_assert(VDSO_NR_PAGES == __VDSO_PAGES); + static int map_vdso(const struct vdso_image *image, struct vm_special_mapping *vdso_mapping) { + const size_t area_size = image->size + VDSO_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long text_start, addr = 0; @@ -374,23 +135,20 @@ static int map_vdso(const struct vdso_image *image, * region is free. */ if (current->flags & PF_RANDOMIZE) { - addr = get_unmapped_area(NULL, 0, - image->size - image->sym_vvar_start, - 0, 0); + addr = get_unmapped_area(NULL, 0, area_size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - addr = vdso_addr(addr, image->size - image->sym_vvar_start); + addr = vdso_addr(addr, area_size); } - addr = get_unmapped_area(NULL, addr, - image->size - image->sym_vvar_start, 0, 0); + addr = get_unmapped_area(NULL, addr, area_size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - text_start = addr - image->sym_vvar_start; + text_start = addr + VDSO_NR_PAGES * PAGE_SIZE; current->mm->context.vdso = (void __user *)text_start; /* @@ -408,11 +166,7 @@ static int map_vdso(const struct vdso_image *image, goto up_fail; } - vma = _install_special_mapping(mm, - addr, - -image->sym_vvar_start, - VM_READ|VM_MAYREAD, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, addr); if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/arch/x86/entry/vdso/vdso32/Makefile b/arch/x86/entry/vdso/vdso32/Makefile index add6afb484ba..ded4fc6a48cd 100644 --- a/arch/x86/entry/vdso/vdso32/Makefile +++ b/arch/x86/entry/vdso/vdso32/Makefile @@ -15,6 +15,10 @@ flags-y := -DBUILD_VDSO32 -m32 -mregparm=0 flags-$(CONFIG_X86_64) += -include $(src)/fake_32bit_build.h flags-remove-y := -m64 +# Checker flags +CHECKFLAGS := $(subst -m64,-m32,$(CHECKFLAGS)) +CHECKFLAGS := $(subst -D__x86_64__,-D__i386__,$(CHECKFLAGS)) + # The location of this include matters! include $(src)/../common/Makefile.include diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ff4d29911fd..b4da1fb976c1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -56,11 +56,7 @@ #include <linux/sched/isolation.h> #include <crypto/chacha.h> #include <crypto/blake2s.h> -#ifdef CONFIG_VDSO_GETRANDOM -#include <vdso/getrandom.h> #include <vdso/datapage.h> -#include <vdso/vsyscall.h> -#endif #include <asm/archrandom.h> #include <asm/processor.h> #include <asm/irq.h> @@ -269,7 +265,7 @@ static void crng_reseed(struct work_struct *work) if (next_gen == ULONG_MAX) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); -#ifdef CONFIG_VDSO_GETRANDOM + /* base_crng.generation's invalid value is ULONG_MAX, while * vdso_k_rng_data->generation's invalid value is 0, so add one to the * former to arrive at the latter. Use smp_store_release so that this @@ -283,8 +279,9 @@ static void crng_reseed(struct work_struct *work) * because the vDSO side only checks whether the value changed, without * actually using or interpreting the value. */ - smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1); -#endif + if (IS_ENABLED(CONFIG_VDSO_GETRANDOM)) + smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1); + if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; spin_unlock_irqrestore(&base_crng.lock, flags); @@ -734,9 +731,8 @@ static void __cold _credit_init_bits(size_t bits) if (system_dfl_wq) queue_work(system_dfl_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); -#ifdef CONFIG_VDSO_GETRANDOM - WRITE_ONCE(vdso_k_rng_data->is_ready, true); -#endif + if (IS_ENABLED(CONFIG_VDSO_GETRANDOM)) + WRITE_ONCE(vdso_k_rng_data->is_ready, true); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 1023e2a4bd37..90e8aeebfd2f 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -19,6 +19,15 @@ #error Inconsistent word size. Check asm/bitsperlong.h #endif +#if __CHAR_BIT__ * __SIZEOF_LONG__ != __BITS_PER_LONG +#error Inconsistent word size. Check asm/bitsperlong.h +#endif + +#ifndef __ASSEMBLER__ +_Static_assert(sizeof(long) * 8 == __BITS_PER_LONG, + "Inconsistent word size. Check asm/bitsperlong.h"); +#endif + #ifndef BITS_PER_LONG_LONG #define BITS_PER_LONG_LONG 64 #endif diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index ccf5c0ca26b7..7c38190b10bf 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -25,8 +25,7 @@ struct clocksource_base; struct clocksource; struct module; -#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \ - defined(CONFIG_GENERIC_GETTIMEOFDAY) +#if defined(CONFIG_GENERIC_GETTIMEOFDAY) #include <asm/clocksource.h> #endif @@ -103,9 +102,6 @@ struct clocksource { u32 shift; u64 max_idle_ns; u32 maxadj; -#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA - struct arch_clocksource_data archdata; -#endif u64 max_cycles; u64 max_raw_delta; const char *name; diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index c514d0e5a45c..58bd9728df58 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -8,6 +8,7 @@ #include <linux/ns_common.h> #include <linux/err.h> #include <linux/time64.h> +#include <linux/cleanup.h> struct user_namespace; extern struct user_namespace init_user_ns; @@ -25,7 +26,9 @@ struct time_namespace { struct ucounts *ucounts; struct ns_common ns; struct timens_offsets offsets; +#ifdef CONFIG_TIME_NS_VDSO struct page *vvar_page; +#endif /* If set prevents changing offsets after any task joined namespace. */ bool frozen_offsets; } __randomize_layout; @@ -38,9 +41,6 @@ static inline struct time_namespace *to_time_ns(struct ns_common *ns) return container_of(ns, struct time_namespace, ns); } void __init time_ns_init(void); -extern int vdso_join_timens(struct task_struct *task, - struct time_namespace *ns); -extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { @@ -53,7 +53,6 @@ struct time_namespace *copy_time_ns(u64 flags, struct time_namespace *old_ns); void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); -struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { @@ -117,17 +116,6 @@ static inline void __init time_ns_init(void) { } -static inline int vdso_join_timens(struct task_struct *task, - struct time_namespace *ns) -{ - return 0; -} - -static inline void timens_commit(struct task_struct *tsk, - struct time_namespace *ns) -{ -} - static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { return NULL; @@ -154,11 +142,6 @@ static inline void timens_on_fork(struct nsproxy *nsproxy, return; } -static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - return NULL; -} - static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } @@ -175,4 +158,20 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif +#ifdef CONFIG_TIME_NS_VDSO +extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); +struct page *find_timens_vvar_page(struct vm_area_struct *vma); +#else /* !CONFIG_TIME_NS_VDSO */ +static inline void timens_commit(struct task_struct *tsk, struct time_namespace *ns) +{ +} + +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif /* CONFIG_TIME_NS_VDSO */ + +DEFINE_FREE(time_ns, struct time_namespace *, if (_T) put_time_ns(_T)) + #endif /* _LINUX_TIMENS_H */ diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h index a91fa24b06e0..0b530428db71 100644 --- a/include/linux/vdso_datastore.h +++ b/include/linux/vdso_datastore.h @@ -2,9 +2,15 @@ #ifndef _LINUX_VDSO_DATASTORE_H #define _LINUX_VDSO_DATASTORE_H +#ifdef CONFIG_HAVE_GENERIC_VDSO #include <linux/mm_types.h> extern const struct vm_special_mapping vdso_vvar_mapping; struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr); +void __init vdso_setup_data_pages(void); +#else /* !CONFIG_HAVE_GENERIC_VDSO */ +static inline void vdso_setup_data_pages(void) { } +#endif /* CONFIG_HAVE_GENERIC_VDSO */ + #endif /* _LINUX_VDSO_DATASTORE_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 23c39b96190f..5977723fb3b5 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -4,24 +4,16 @@ #ifndef __ASSEMBLY__ -#include <linux/compiler.h> +#include <linux/types.h> + #include <uapi/linux/bits.h> #include <uapi/linux/time.h> -#include <uapi/linux/types.h> -#include <uapi/asm-generic/errno-base.h> #include <vdso/align.h> #include <vdso/bits.h> #include <vdso/cache.h> -#include <vdso/clocksource.h> -#include <vdso/ktime.h> -#include <vdso/limits.h> -#include <vdso/math64.h> #include <vdso/page.h> -#include <vdso/processor.h> #include <vdso/time.h> -#include <vdso/time32.h> -#include <vdso/time64.h> #ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA #include <asm/vdso/time_data.h> @@ -80,8 +72,8 @@ struct vdso_timestamp { * @mask: clocksource mask * @mult: clocksource multiplier * @shift: clocksource shift - * @basetime[clock_id]: basetime per clock_id - * @offset[clock_id]: time namespace offset per clock_id + * @basetime: basetime per clock_id + * @offset: time namespace offset per clock_id * * See also struct vdso_time_data for basic access and ordering information as * struct vdso_clock is used there. @@ -184,17 +176,6 @@ enum vdso_pages { VDSO_NR_PAGES }; -/* - * The generic vDSO implementation requires that gettimeofday.h - * provides: - * - __arch_get_hw_counter(): to get the hw counter based on the - * clock_mode. - * - gettimeofday_fallback(): fallback for gettimeofday. - * - clock_gettime_fallback(): fallback for clock_gettime. - * - clock_getres_fallback(): fallback for clock_getres. - */ -#include <asm/vdso/gettimeofday.h> - #else /* !__ASSEMBLY__ */ #ifdef CONFIG_VDSO_GETRANDOM diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 1a5ee9d9052c..a3bf4f1c0d37 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -6,6 +6,13 @@ #include <asm/barrier.h> #include <vdso/datapage.h> +#include <vdso/processor.h> +#include <vdso/clocksource.h> + +static __always_inline bool vdso_is_timens_clock(const struct vdso_clock *vc) +{ + return IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS; +} static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc) { @@ -18,6 +25,28 @@ static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc) return seq; } +/* + * Variant of vdso_read_begin() to handle VDSO_CLOCKMODE_TIMENS. + * + * Time namespace enabled tasks have a special VVAR page installed which has + * vc->seq set to 1 and vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non + * time namespace affected tasks this does not affect performance because if + * vc->seq is odd, i.e. a concurrent update is in progress the extra check for + * vc->clock_mode is just a few extra instructions while spin waiting for + * vc->seq to become even again. + */ +static __always_inline bool vdso_read_begin_timens(const struct vdso_clock *vc, u32 *seq) +{ + while (unlikely((*seq = READ_ONCE(vc->seq)) & 1)) { + if (vdso_is_timens_clock(vc)) + return true; + cpu_relax(); + } + smp_rmb(); + + return false; +} + static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, u32 start) { @@ -25,7 +54,7 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, smp_rmb(); seq = READ_ONCE(vc->seq); - return seq != start; + return unlikely(seq != start); } static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc) diff --git a/init/Kconfig b/init/Kconfig index 87af5d7aa4c0..43875ef36752 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1400,12 +1400,14 @@ config UTS_NS config TIME_NS bool "TIME namespace" - depends on GENERIC_GETTIMEOFDAY default y help In this namespace boottime and monotonic clocks can be set. The time will keep going with the same pace. +config TIME_NS_VDSO + def_bool TIME_NS && GENERIC_GETTIMEOFDAY + config IPC_NS bool "IPC namespace" depends on (SYSVIPC || POSIX_MQUEUE) diff --git a/init/main.c b/init/main.c index 9481e835406f..96f93bb06c49 100644 --- a/init/main.c +++ b/init/main.c @@ -106,6 +106,7 @@ #include <linux/ptdump.h> #include <linux/time_namespace.h> #include <linux/unaligned.h> +#include <linux/vdso_datastore.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -1127,6 +1128,7 @@ void start_kernel(void) srcu_init(); hrtimers_init(); softirq_init(); + vdso_setup_data_pages(); timekeeping_init(); time_init(); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 6a11964377e6..02aac7c5aa76 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -9,10 +9,6 @@ config CLOCKSOURCE_WATCHDOG bool -# Architecture has extra clocksource data -config ARCH_CLOCKSOURCE_DATA - bool - # Architecture has extra clocksource init called from registration config ARCH_CLOCKSOURCE_INIT bool diff --git a/kernel/time/Makefile b/kernel/time/Makefile index f7d52d9543cc..eaf290c972f9 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -30,5 +30,6 @@ obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o obj-$(CONFIG_TIME_NS) += namespace.o +obj-$(CONFIG_TIME_NS_VDSO) += namespace_vdso.o obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o obj-$(CONFIG_TIME_KUNIT_TEST) += time_test.o diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 652744e00eb4..4bca3f78c8ea 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -18,8 +18,9 @@ #include <linux/cred.h> #include <linux/err.h> #include <linux/mm.h> +#include <linux/cleanup.h> -#include <vdso/datapage.h> +#include "namespace_internal.h" ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *ns_offsets) @@ -93,8 +94,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, if (!ns) goto fail_dec; - ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!ns->vvar_page) + err = timens_vdso_alloc_vvar_page(ns); + if (err) goto fail_free; err = ns_common_init(ns); @@ -109,7 +110,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, return ns; fail_free_page: - __free_page(ns->vvar_page); + timens_vdso_free_vvar_page(ns); fail_free: kfree(ns); fail_dec: @@ -138,117 +139,7 @@ struct time_namespace *copy_time_ns(u64 flags, return clone_time_ns(user_ns, old_ns); } -static struct timens_offset offset_from_ts(struct timespec64 off) -{ - struct timens_offset ret; - - ret.sec = off.tv_sec; - ret.nsec = off.tv_nsec; - - return ret; -} - -/* - * A time namespace VVAR page has the same layout as the VVAR page which - * contains the system wide VDSO data. - * - * For a normal task the VVAR pages are installed in the normal ordering: - * VVAR - * PVCLOCK - * HVCLOCK - * TIMENS <- Not really required - * - * Now for a timens task the pages are installed in the following order: - * TIMENS - * PVCLOCK - * HVCLOCK - * VVAR - * - * The check for vdso_clock->clock_mode is in the unlikely path of - * the seq begin magic. So for the non-timens case most of the time - * 'seq' is even, so the branch is not taken. - * - * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check - * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the - * update to finish and for 'seq' to become even anyway. - * - * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which - * enforces the time namespace handling path. - */ -static void timens_setup_vdso_clock_data(struct vdso_clock *vc, - struct time_namespace *ns) -{ - struct timens_offset *offset = vc->offset; - struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); - struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); - - vc->seq = 1; - vc->clock_mode = VDSO_CLOCKMODE_TIMENS; - offset[CLOCK_MONOTONIC] = monotonic; - offset[CLOCK_MONOTONIC_RAW] = monotonic; - offset[CLOCK_MONOTONIC_COARSE] = monotonic; - offset[CLOCK_BOOTTIME] = boottime; - offset[CLOCK_BOOTTIME_ALARM] = boottime; -} - -struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - if (likely(vma->vm_mm == current->mm)) - return current->nsproxy->time_ns->vvar_page; - - /* - * VM_PFNMAP | VM_IO protect .fault() handler from being called - * through interfaces like /proc/$pid/mem or - * process_vm_{readv,writev}() as long as there's no .access() - * in special_mapping_vmops(). - * For more details check_vma_flags() and __access_remote_vm() - */ - - WARN(1, "vvar_page accessed remotely"); - - return NULL; -} - -/* - * Protects possibly multiple offsets writers racing each other - * and tasks entering the namespace. - */ -static DEFINE_MUTEX(offset_lock); - -static void timens_set_vvar_page(struct task_struct *task, - struct time_namespace *ns) -{ - struct vdso_time_data *vdata; - struct vdso_clock *vc; - unsigned int i; - - if (ns == &init_time_ns) - return; - - /* Fast-path, taken by every task in namespace except the first. */ - if (likely(ns->frozen_offsets)) - return; - - mutex_lock(&offset_lock); - /* Nothing to-do: vvar_page has been already initialized. */ - if (ns->frozen_offsets) - goto out; - - ns->frozen_offsets = true; - vdata = page_address(ns->vvar_page); - vc = vdata->clock_data; - - for (i = 0; i < CS_BASES; i++) - timens_setup_vdso_clock_data(&vc[i], ns); - - if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { - for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) - timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); - } - -out: - mutex_unlock(&offset_lock); -} +DEFINE_MUTEX(timens_offset_lock); void free_time_ns(struct time_namespace *ns) { @@ -256,41 +147,39 @@ void free_time_ns(struct time_namespace *ns) dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_common_free(ns); - __free_page(ns->vvar_page); + timens_vdso_free_vvar_page(ns); /* Concurrent nstree traversal depends on a grace period. */ kfree_rcu(ns, ns.ns_rcu); } static struct ns_common *timens_get(struct task_struct *task) { - struct time_namespace *ns = NULL; + struct time_namespace *ns; struct nsproxy *nsproxy; - task_lock(task); + guard(task_lock)(task); nsproxy = task->nsproxy; - if (nsproxy) { - ns = nsproxy->time_ns; - get_time_ns(ns); - } - task_unlock(task); + if (!nsproxy) + return NULL; - return ns ? &ns->ns : NULL; + ns = nsproxy->time_ns; + get_time_ns(ns); + return &ns->ns; } static struct ns_common *timens_for_children_get(struct task_struct *task) { - struct time_namespace *ns = NULL; + struct time_namespace *ns; struct nsproxy *nsproxy; - task_lock(task); + guard(task_lock)(task); nsproxy = task->nsproxy; - if (nsproxy) { - ns = nsproxy->time_ns_for_children; - get_time_ns(ns); - } - task_unlock(task); + if (!nsproxy) + return NULL; - return ns ? &ns->ns : NULL; + ns = nsproxy->time_ns_for_children; + get_time_ns(ns); + return &ns->ns; } static void timens_put(struct ns_common *ns) @@ -298,12 +187,6 @@ static void timens_put(struct ns_common *ns) put_time_ns(to_time_ns(ns)); } -void timens_commit(struct task_struct *tsk, struct time_namespace *ns) -{ - timens_set_vvar_page(tsk, ns); - vdso_join_timens(tsk, ns); -} - static int timens_install(struct nsset *nsset, struct ns_common *new) { struct nsproxy *nsproxy = nsset->nsproxy; @@ -367,36 +250,33 @@ static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts) void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m) { - struct ns_common *ns; - struct time_namespace *time_ns; + struct time_namespace *time_ns __free(time_ns) = NULL; + struct ns_common *ns = timens_for_children_get(p); - ns = timens_for_children_get(p); if (!ns) return; + time_ns = to_time_ns(ns); show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic); show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime); - put_time_ns(time_ns); } int proc_timens_set_offset(struct file *file, struct task_struct *p, struct proc_timens_offset *offsets, int noffsets) { - struct ns_common *ns; - struct time_namespace *time_ns; + struct time_namespace *time_ns __free(time_ns) = NULL; + struct ns_common *ns = timens_for_children_get(p); struct timespec64 tp; - int i, err; + int i; - ns = timens_for_children_get(p); if (!ns) return -ESRCH; + time_ns = to_time_ns(ns); - if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) { - put_time_ns(time_ns); + if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) return -EPERM; - } for (i = 0; i < noffsets; i++) { struct proc_timens_offset *off = &offsets[i]; @@ -409,15 +289,12 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, ktime_get_boottime_ts64(&tp); break; default: - err = -EINVAL; - goto out; + return -EINVAL; } - err = -ERANGE; - if (off->val.tv_sec > KTIME_SEC_MAX || off->val.tv_sec < -KTIME_SEC_MAX) - goto out; + return -ERANGE; tp = timespec64_add(tp, off->val); /* @@ -425,16 +302,13 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, * still unreachable. */ if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2) - goto out; + return -ERANGE; } - mutex_lock(&offset_lock); - if (time_ns->frozen_offsets) { - err = -EACCES; - goto out_unlock; - } + guard(mutex)(&timens_offset_lock); + if (time_ns->frozen_offsets) + return -EACCES; - err = 0; /* Don't report errors after this line */ for (i = 0; i < noffsets; i++) { struct proc_timens_offset *off = &offsets[i]; @@ -452,12 +326,7 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, *offset = off->val; } -out_unlock: - mutex_unlock(&offset_lock); -out: - put_time_ns(time_ns); - - return err; + return 0; } const struct proc_ns_operations timens_operations = { diff --git a/kernel/time/namespace_internal.h b/kernel/time/namespace_internal.h new file mode 100644 index 000000000000..b37ba179f43b --- /dev/null +++ b/kernel/time/namespace_internal.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TIME_NAMESPACE_INTERNAL_H +#define _TIME_NAMESPACE_INTERNAL_H + +#include <linux/mutex.h> + +struct time_namespace; + +/* + * Protects possibly multiple offsets writers racing each other + * and tasks entering the namespace. + */ +extern struct mutex timens_offset_lock; + +#ifdef CONFIG_TIME_NS_VDSO +int timens_vdso_alloc_vvar_page(struct time_namespace *ns); +void timens_vdso_free_vvar_page(struct time_namespace *ns); +#else /* !CONFIG_TIME_NS_VDSO */ +static inline int timens_vdso_alloc_vvar_page(struct time_namespace *ns) +{ + return 0; +} +static inline void timens_vdso_free_vvar_page(struct time_namespace *ns) +{ +} +#endif /* CONFIG_TIME_NS_VDSO */ + +#endif /* _TIME_NAMESPACE_INTERNAL_H */ diff --git a/kernel/time/namespace_vdso.c b/kernel/time/namespace_vdso.c new file mode 100644 index 000000000000..88c075cd16a3 --- /dev/null +++ b/kernel/time/namespace_vdso.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Andrei Vagin <avagin@openvz.org> + * Author: Dmitry Safonov <dima@arista.com> + */ + +#include <linux/cleanup.h> +#include <linux/mm.h> +#include <linux/time_namespace.h> +#include <linux/time.h> +#include <linux/vdso_datastore.h> + +#include <vdso/clocksource.h> +#include <vdso/datapage.h> + +#include "namespace_internal.h" + +static struct timens_offset offset_from_ts(struct timespec64 off) +{ + struct timens_offset ret; + + ret.sec = off.tv_sec; + ret.nsec = off.tv_nsec; + + return ret; +} + +/* + * A time namespace VVAR page has the same layout as the VVAR page which + * contains the system wide VDSO data. + * + * For a normal task the VVAR pages are installed in the normal ordering: + * VVAR + * PVCLOCK + * HVCLOCK + * TIMENS <- Not really required + * + * Now for a timens task the pages are installed in the following order: + * TIMENS + * PVCLOCK + * HVCLOCK + * VVAR + * + * The check for vdso_clock->clock_mode is in the unlikely path of + * the seq begin magic. So for the non-timens case most of the time + * 'seq' is even, so the branch is not taken. + * + * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check + * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the + * update to finish and for 'seq' to become even anyway. + * + * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which + * enforces the time namespace handling path. + */ +static void timens_setup_vdso_clock_data(struct vdso_clock *vc, + struct time_namespace *ns) +{ + struct timens_offset *offset = vc->offset; + struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); + struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); + + vc->seq = 1; + vc->clock_mode = VDSO_CLOCKMODE_TIMENS; + offset[CLOCK_MONOTONIC] = monotonic; + offset[CLOCK_MONOTONIC_RAW] = monotonic; + offset[CLOCK_MONOTONIC_COARSE] = monotonic; + offset[CLOCK_BOOTTIME] = boottime; + offset[CLOCK_BOOTTIME_ALARM] = boottime; +} + +struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} + +static void timens_set_vvar_page(struct task_struct *task, + struct time_namespace *ns) +{ + struct vdso_time_data *vdata; + struct vdso_clock *vc; + unsigned int i; + + if (ns == &init_time_ns) + return; + + /* Fast-path, taken by every task in namespace except the first. */ + if (likely(ns->frozen_offsets)) + return; + + guard(mutex)(&timens_offset_lock); + /* Nothing to-do: vvar_page has been already initialized. */ + if (ns->frozen_offsets) + return; + + ns->frozen_offsets = true; + vdata = page_address(ns->vvar_page); + vc = vdata->clock_data; + + for (i = 0; i < CS_BASES; i++) + timens_setup_vdso_clock_data(&vc[i], ns); + + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { + for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) + timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); + } +} + +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will be re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_clock_data() for details. + */ +static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + guard(mmap_read_lock)(mm); + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) + zap_vma_pages(vma); + } + return 0; +} + +void timens_commit(struct task_struct *tsk, struct time_namespace *ns) +{ + timens_set_vvar_page(tsk, ns); + vdso_join_timens(tsk, ns); +} + +int timens_vdso_alloc_vvar_page(struct time_namespace *ns) +{ + ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!ns->vvar_page) + return -ENOMEM; + + return 0; +} + +void timens_vdso_free_vvar_page(struct time_namespace *ns) +{ + __free_page(ns->vvar_page); +} diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index a565c30c71a0..cf5d784a4a5a 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -1,64 +1,92 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/linkage.h> -#include <linux/mmap_lock.h> +#include <linux/gfp.h> +#include <linux/init.h> #include <linux/mm.h> #include <linux/time_namespace.h> #include <linux/types.h> #include <linux/vdso_datastore.h> #include <vdso/datapage.h> -/* - * The vDSO data page. - */ +static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {}; + #ifdef CONFIG_GENERIC_GETTIMEOFDAY -static union { - struct vdso_time_data data; - u8 page[PAGE_SIZE]; -} vdso_time_data_store __page_aligned_data; -struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; -static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); +struct vdso_time_data *vdso_k_time_data __refdata = + (void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE]; + +static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE); #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #ifdef CONFIG_VDSO_GETRANDOM -static union { - struct vdso_rng_data data; - u8 page[PAGE_SIZE]; -} vdso_rng_data_store __page_aligned_data; -struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data; -static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE); +struct vdso_rng_data *vdso_k_rng_data __refdata = + (void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE]; + +static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE); #endif /* CONFIG_VDSO_GETRANDOM */ #ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA -static union { - struct vdso_arch_data data; - u8 page[VDSO_ARCH_DATA_SIZE]; -} vdso_arch_data_store __page_aligned_data; -struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data; +struct vdso_arch_data *vdso_k_arch_data __refdata = + (void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE]; #endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */ +void __init vdso_setup_data_pages(void) +{ + unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE); + struct page *pages; + + /* + * Allocate the data pages dynamically. SPARC does not support mapping + * static pages to be mapped into userspace. + * It is also a requirement for mlockall() support. + * + * Do not use folios. In time namespaces the pages are mapped in a different order + * to userspace, which is not handled by the folio optimizations in finish_fault(). + */ + pages = alloc_pages(GFP_KERNEL, order); + if (!pages) + panic("Unable to allocate VDSO storage pages"); + + /* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */ + split_page(pages, order); + + /* Move the data already written by other subsystems to the new pages */ + memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE); + + if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) + vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET); + + if (IS_ENABLED(CONFIG_VDSO_GETRANDOM)) + vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET); + + if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) + vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START); +} + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; + struct page *page, *timens_page; + + timens_page = find_timens_vvar_page(vma); switch (vmf->pgoff) { case VDSO_TIME_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + page = virt_to_page(vdso_k_time_data); if (timens_page) { /* * Fault in VVAR page too, since it will be accessed * to get clock data anyway. */ + unsigned long addr; + vm_fault_t err; + addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); + err = vmf_insert_page(vma, addr, page); if (unlikely(err & VM_FAULT_ERROR)) return err; - pfn = page_to_pfn(timens_page); + page = timens_page; } break; case VDSO_TIMENS_PAGE_OFFSET: @@ -71,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, */ if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + page = virt_to_page(vdso_k_time_data); break; case VDSO_RNG_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data)); + page = virt_to_page(vdso_k_rng_data); break; case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END: if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) + - vmf->pgoff - VDSO_ARCH_PAGES_START; + page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START; break; default: return VM_FAULT_SIGBUS; } - return vmf_insert_pfn(vma, vmf->address, pfn); + get_page(page); + vmf->page = page; + return 0; } const struct vm_special_mapping vdso_vvar_mapping = { @@ -100,31 +129,6 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned { return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | - VM_PFNMAP | VM_SEALED_SYSMAP, + VM_MIXEDMAP | VM_SEALED_SYSMAP, &vdso_vvar_mapping); } - -#ifdef CONFIG_TIME_NS -/* - * The vvar page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_clock_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c index 440f8a6203a6..7e29005aa208 100644 --- a/lib/vdso/getrandom.c +++ b/lib/vdso/getrandom.c @@ -7,8 +7,11 @@ #include <linux/minmax.h> #include <vdso/datapage.h> #include <vdso/getrandom.h> +#include <vdso/limits.h> #include <vdso/unaligned.h> +#include <asm/barrier.h> #include <asm/vdso/getrandom.h> +#include <uapi/linux/errno.h> #include <uapi/linux/mman.h> #include <uapi/linux/random.h> diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 4399e143d43a..a5798bd26d20 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -3,8 +3,25 @@ * Generic userspace implementations of gettimeofday() and similar. */ #include <vdso/auxclock.h> +#include <vdso/clocksource.h> #include <vdso/datapage.h> #include <vdso/helpers.h> +#include <vdso/ktime.h> +#include <vdso/limits.h> +#include <vdso/math64.h> +#include <vdso/time32.h> +#include <vdso/time64.h> + +/* + * The generic vDSO implementation requires that gettimeofday.h + * provides: + * - __arch_get_hw_counter(): to get the hw counter based on the + * clock_mode. + * - gettimeofday_fallback(): fallback for gettimeofday. + * - clock_gettime_fallback(): fallback for clock_gettime. + * - clock_getres_fallback(): fallback for clock_getres. + */ +#include <asm/vdso/gettimeofday.h> /* Bring in default accessors */ #include <vdso/vsyscall.h> @@ -135,7 +152,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); /* Add the namespace offset */ sec += offs->sec; @@ -158,28 +175,12 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, return false; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a - * special VVAR page installed which has vc->seq set to 1 and - * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time - * namespace affected tasks this does not affect performance - * because if vc->seq is odd, i.e. a concurrent update is in - * progress the extra check for vc->clock_mode is just a few - * extra instructions while spin waiting for vc->seq to become - * even again. - */ - while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_hres_timens(vd, vc, clk, ts); - cpu_relax(); - } - smp_rmb(); + if (vdso_read_begin_timens(vc, &seq)) + return do_hres_timens(vd, vc, clk, ts); if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); vdso_set_timespec(ts, sec, ns); @@ -204,7 +205,7 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock seq = vdso_read_begin(vc); sec = vdso_ts->sec; nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); /* Add the namespace offset */ sec += offs->sec; @@ -223,21 +224,12 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, u32 seq; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCK_TIMENS. See comment in do_hres(). - */ - while ((seq = READ_ONCE(vc->seq)) & 1) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_coarse_timens(vd, vc, clk, ts); - cpu_relax(); - } - smp_rmb(); + if (vdso_read_begin_timens(vc, &seq)) + return do_coarse_timens(vd, vc, clk, ts); ts->tv_sec = vdso_ts->sec; ts->tv_nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); return true; } @@ -256,20 +248,12 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti vc = &vd->aux_clock_data[idx]; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCK_TIMENS. See comment in do_hres(). - */ - while ((seq = READ_ONCE(vc->seq)) & 1) { - if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { - vd = __arch_get_vdso_u_timens_data(vd); - vc = &vd->aux_clock_data[idx]; - /* Re-read from the real time data page */ - continue; - } - cpu_relax(); + if (vdso_read_begin_timens(vc, &seq)) { + vd = __arch_get_vdso_u_timens_data(vd); + vc = &vd->aux_clock_data[idx]; + /* Re-read from the real time data page */ + continue; } - smp_rmb(); /* Auxclock disabled? */ if (vc->clock_mode == VDSO_CLOCKMODE_NONE) @@ -277,7 +261,7 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); vdso_set_timespec(ts, sec, ns); @@ -313,7 +297,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, return do_hres(vd, vc, clock, ts); } -static __maybe_unused int +static int __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { @@ -333,7 +317,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) } #ifdef BUILD_VDSO32 -static __maybe_unused int +static int __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { @@ -359,7 +343,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) } #endif /* BUILD_VDSO32 */ -static __maybe_unused int +static int __cvdso_gettimeofday_data(const struct vdso_time_data *vd, struct __kernel_old_timeval *tv, struct timezone *tz) { @@ -376,8 +360,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, } if (unlikely(tz != NULL)) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) + if (vdso_is_timens_clock(vc)) vd = __arch_get_vdso_u_timens_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; @@ -394,14 +377,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) } #ifdef VDSO_HAS_TIME -static __maybe_unused __kernel_old_time_t +static __kernel_old_time_t __cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) { const struct vdso_clock *vc = vd->clock_data; __kernel_old_time_t t; - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { + if (vdso_is_timens_clock(vc)) { vd = __arch_get_vdso_u_timens_data(vd); vc = vd->clock_data; } @@ -432,8 +414,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc if (!vdso_clockid_valid(clock)) return false; - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) + if (vdso_is_timens_clock(vc)) vd = __arch_get_vdso_u_timens_data(vd); /* @@ -464,7 +445,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc return true; } -static __maybe_unused +static int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { @@ -484,7 +465,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) } #ifdef BUILD_VDSO32 -static __maybe_unused int +static int __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index e361aca22a74..a61047bdcd57 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -19,8 +19,6 @@ endif include ../lib.mk -CFLAGS += $(TOOLS_INCLUDES) - CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \ -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \ -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES) @@ -28,13 +26,11 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c +$(OUTPUT)/vdso_test_correctness: parse_vdso.c vdso_test_correctness.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers $(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) -$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c -$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl - $(OUTPUT)/vdso_test_getrandom: parse_vdso.c $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(KHDR_INCLUDES) \ diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 3ff00fb624a4..c6ff4413ea36 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -19,8 +19,7 @@ #include <stdint.h> #include <string.h> #include <limits.h> -#include <linux/auxvec.h> -#include <linux/elf.h> +#include <elf.h> #include "parse_vdso.h" diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 055af95aa552..5c5a07dd1128 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -11,28 +11,22 @@ #include <time.h> #include <stdlib.h> #include <unistd.h> +#include <sys/auxv.h> #include <sys/syscall.h> -#include <dlfcn.h> #include <string.h> #include <errno.h> #include <sched.h> #include <stdbool.h> #include <limits.h> +#include "parse_vdso.h" #include "vdso_config.h" #include "vdso_call.h" #include "kselftest.h" +static const char *version; static const char **name; -#ifndef SYS_getcpu -# ifdef __x86_64__ -# define SYS_getcpu 309 -# else -# define SYS_getcpu 318 -# endif -#endif - #ifndef __NR_clock_gettime64 #define __NR_clock_gettime64 403 #endif @@ -61,6 +55,10 @@ typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); vgtod_t vdso_gettimeofday; +typedef time_t (*vtime_t)(__kernel_time_t *tloc); + +vtime_t vdso_time; + typedef long (*getcpu_t)(unsigned *, unsigned *, void *); getcpu_t vgetcpu; @@ -110,42 +108,39 @@ static void *vsyscall_getcpu(void) static void fill_function_pointers(void) { - void *vdso = dlopen("linux-vdso.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) - vdso = dlopen("linux-gate.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) - vdso = dlopen("linux-vdso32.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) - vdso = dlopen("linux-vdso64.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) { + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + + if (!sysinfo_ehdr) { printf("[WARN]\tfailed to find vDSO\n"); return; } - vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]); + vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); + + vdso_getcpu = (getcpu_t)vdso_sym(version, name[4]); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); vgetcpu = (getcpu_t) vsyscall_getcpu(); - vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]); + vdso_clock_gettime = (vgettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) printf("Warning: failed to find clock_gettime in vDSO\n"); #if defined(VDSO_32BIT) - vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]); + vdso_clock_gettime64 = (vgettime64_t)vdso_sym(version, name[5]); if (!vdso_clock_gettime64) printf("Warning: failed to find clock_gettime64 in vDSO\n"); #endif - vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]); + vdso_gettimeofday = (vgtod_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) printf("Warning: failed to find gettimeofday in vDSO\n"); + vdso_time = (vtime_t)vdso_sym(version, name[2]); + if (!vdso_time) + printf("Warning: failed to find time in vDSO\n"); + } static long sys_getcpu(unsigned * cpu, unsigned * node, @@ -169,6 +164,16 @@ static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) return syscall(__NR_gettimeofday, tv, tz); } +static inline __kernel_old_time_t sys_time(__kernel_old_time_t *tloc) +{ +#ifdef __NR_time + return syscall(__NR_time, tloc); +#else + errno = ENOSYS; + return -1; +#endif +} + static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); @@ -412,10 +417,10 @@ static void test_gettimeofday(void) return; } - printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", - (unsigned long long)start.tv_sec, start.tv_usec, - (unsigned long long)vdso.tv_sec, vdso.tv_usec, - (unsigned long long)end.tv_sec, end.tv_usec); + printf("\t%llu.%06lld %llu.%06lld %llu.%06lld\n", + (unsigned long long)start.tv_sec, (long long)start.tv_usec, + (unsigned long long)vdso.tv_sec, (long long)vdso.tv_usec, + (unsigned long long)end.tv_sec, (long long)end.tv_usec); if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { printf("[FAIL]\tTimes are out of sequence\n"); @@ -435,8 +440,56 @@ static void test_gettimeofday(void) VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL); } +static void test_time(void) +{ + __kernel_old_time_t start, end, vdso_ret, vdso_param; + + if (!vdso_time) + return; + + printf("[RUN]\tTesting time...\n"); + + if (sys_time(&start) < 0) { + if (errno == -ENOSYS) { + printf("[SKIP]\tNo time() support\n"); + } else { + printf("[FAIL]\tsys_time failed (%d)\n", errno); + nerrs++; + } + return; + } + + vdso_ret = VDSO_CALL(vdso_time, 1, &vdso_param); + end = sys_time(NULL); + + if (vdso_ret < 0 || end < 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + (int)vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%lld %lld %lld\n", + (long long)start, + (long long)vdso_ret, + (long long)end); + + if (vdso_ret != vdso_param) { + printf("[FAIL]\tinconsistent return values: %lld %lld\n", + (long long)vdso_ret, (long long)vdso_param); + nerrs++; + return; + } + + if (!(start <= vdso_ret) || !(vdso_ret <= end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } +} + int main(int argc, char **argv) { + version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; fill_function_pointers(); @@ -444,6 +497,7 @@ int main(int argc, char **argv) test_clock_gettime(); test_clock_gettime64(); test_gettimeofday(); + test_time(); /* * Test getcpu() last so that, if something goes wrong setting affinity, diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index 912edadad92c..990b29e0e272 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -11,10 +11,8 @@ */ #include <stdio.h> -#ifndef NOLIBC #include <sys/auxv.h> #include <sys/time.h> -#endif #include "kselftest.h" #include "parse_vdso.h" |
