diff options
Diffstat (limited to 'arch/um/os-Linux')
| -rw-r--r-- | arch/um/os-Linux/Makefile | 4 | ||||
| -rw-r--r-- | arch/um/os-Linux/internal.h | 8 | ||||
| -rw-r--r-- | arch/um/os-Linux/process.c | 20 | ||||
| -rw-r--r-- | arch/um/os-Linux/signal.c | 31 | ||||
| -rw-r--r-- | arch/um/os-Linux/skas/process.c | 39 | ||||
| -rw-r--r-- | arch/um/os-Linux/smp.c | 148 | ||||
| -rw-r--r-- | arch/um/os-Linux/start_up.c | 4 | ||||
| -rw-r--r-- | arch/um/os-Linux/time.c | 38 |
8 files changed, 267 insertions, 25 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index fae836713487..70c73c22f715 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o +obj-$(CONFIG_SMP) += smp.o + USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ - tty.o umid.o util.o + tty.o umid.o util.o smp.o include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index c2c7a0dc673c..bac9fcc8c14c 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -4,6 +4,7 @@ #include <mm_id.h> #include <stub-data.h> +#include <signal.h> /* * elf_aux.c @@ -18,6 +19,7 @@ void check_tmpexec(void); /* * signal.c */ +extern __thread int signals_enabled; int timer_alarm_pending(void); /* @@ -25,4 +27,10 @@ int timer_alarm_pending(void); */ void wait_stub_done(int pid); void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); + +/* + * smp.c + */ +#define IPI_SIGNAL SIGRTMIN + #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 00b49e90d05f..3a2a84ab9325 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -10,6 +10,8 @@ #include <errno.h> #include <signal.h> #include <fcntl.h> +#include <limits.h> +#include <linux/futex.h> #include <sys/mman.h> #include <sys/ptrace.h> #include <sys/prctl.h> @@ -189,3 +191,21 @@ void os_set_pdeathsig(void) { prctl(PR_SET_PDEATHSIG, SIGKILL); } + +int os_futex_wait(void *uaddr, unsigned int val) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} + +int os_futex_wake(void *uaddr) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 554a87dd32cc..327fb3c52fc7 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -69,7 +69,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGCHLD_BIT 2 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) -static __thread int signals_enabled; +__thread int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif @@ -259,9 +259,29 @@ int change_sig(int signal, int on) return 0; } -void block_signals(void) +static inline void __block_signals(void) { + if (!signals_enabled) + return; + + os_local_ipi_disable(); + barrier(); signals_enabled = 0; +} + +static inline void __unblock_signals(void) +{ + if (signals_enabled) + return; + + signals_enabled = 1; + barrier(); + os_local_ipi_enable(); +} + +void block_signals(void) +{ + __block_signals(); /* * This must return with signals disabled, so this barrier * ensures that writes are flushed out before the return. @@ -278,7 +298,8 @@ void unblock_signals(void) if (signals_enabled == 1) return; - signals_enabled = 1; + __unblock_signals(); + #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -312,7 +333,7 @@ void unblock_signals(void) * tracing that happens inside the handlers we call for the * pending signals will mess up the tracing state. */ - signals_enabled = 0; + __block_signals(); um_trace_signals_off(); /* @@ -344,7 +365,7 @@ void unblock_signals(void) /* Re-enable signals and trace that we're doing so. */ um_trace_signals_on(); - signals_enabled = 1; + __unblock_signals(); } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 820846ff7179..d6c22f8aa06d 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -546,7 +546,7 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op; - siginfo_t si_ptrace; + siginfo_t si_local; siginfo_t *si; int sig; @@ -557,6 +557,13 @@ void userspace(struct uml_pt_regs *regs) struct mm_id *mm_id = current_mm_id(); /* + * At any given time, only one CPU thread can enter the + * turnstile to operate on the same stub process, including + * executing stub system calls (mmap and munmap). + */ + enter_turnstile(mm_id); + + /* * When we are in time-travel mode, userspace can theoretically * do a *lot* of work without being scheduled. The problem with * this is that it will prevent kernel bookkeeping (primarily @@ -623,9 +630,10 @@ void userspace(struct uml_pt_regs *regs) } if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) - panic("%s - Invalid siginfo offset from child", - __func__); - si = (void *)&proc_data->sigstack[proc_data->si_offset]; + panic("%s - Invalid siginfo offset from child", __func__); + + si = &si_local; + memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si)); regs->is_user = 1; @@ -721,8 +729,8 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: ptrace(PTRACE_GETSIGINFO, pid, 0, - (struct siginfo *)&si_ptrace); - si = &si_ptrace; + (struct siginfo *)&si_local); + si = &si_local; break; default: si = NULL; @@ -733,6 +741,8 @@ void userspace(struct uml_pt_regs *regs) } } + exit_turnstile(mm_id); + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (sig) { @@ -802,10 +812,9 @@ void switch_threads(jmp_buf *me, jmp_buf *you) static jmp_buf initial_jmpbuf; -/* XXX Make these percpu */ -static void (*cb_proc)(void *arg); -static void *cb_arg; -static jmp_buf *cb_back; +static __thread void (*cb_proc)(void *arg); +static __thread void *cb_arg; +static __thread jmp_buf *cb_back; int start_idle_thread(void *stack, jmp_buf *switch_buf) { @@ -859,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) cb_arg = arg; cb_back = &here; - block_signals_trace(); + initial_jmpbuf_lock(); if (UML_SETJMP(&here) == 0) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); - unblock_signals_trace(); + initial_jmpbuf_unlock(); cb_proc = NULL; cb_arg = NULL; @@ -871,8 +880,9 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); + /* unreachable */ } static bool noreboot; @@ -892,6 +902,7 @@ __uml_setup("noreboot", noreboot_cmd_param, void reboot_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); + /* unreachable */ } diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c new file mode 100644 index 000000000000..18d3858a7cd2 --- /dev/null +++ b/arch/um/os-Linux/smp.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ + +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <kern_util.h> +#include <um_malloc.h> +#include <init.h> +#include <os.h> +#include <smp.h> +#include "internal.h" + +struct cpu_thread_data { + int cpu; + sigset_t sigset; +}; + +static __thread int __curr_cpu; + +int uml_curr_cpu(void) +{ + return __curr_cpu; +} + +static pthread_t cpu_threads[CONFIG_NR_CPUS]; + +static void *cpu_thread(void *arg) +{ + struct cpu_thread_data *data = arg; + + __curr_cpu = data->cpu; + + uml_start_secondary(data); + + return NULL; +} + +int os_start_cpu_thread(int cpu) +{ + struct cpu_thread_data *data; + sigset_t sigset, oset; + int err; + + data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = errno; + goto err; + } + + data->cpu = cpu; + data->sigset = oset; + + err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data); + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + if (err != 0) + goto err; + + return 0; + +err: + kfree(data); + return -err; +} + +void os_start_secondary(void *arg, jmp_buf *switch_buf) +{ + struct cpu_thread_data *data = arg; + + sigaddset(&data->sigset, IPI_SIGNAL); + sigaddset(&data->sigset, SIGIO); + + if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + + kfree(data); + longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); +} + +int os_send_ipi(int cpu, int vector) +{ + union sigval value = { .sival_int = vector }; + + return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value); +} + +static void __local_ipi_set(int enable) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, IPI_SIGNAL); + + if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + panic("%s: sigprocmask failed, errno = %d", __func__, errno); +} + +void os_local_ipi_enable(void) +{ + __local_ipi_set(1); +} + +void os_local_ipi_disable(void) +{ + __local_ipi_set(0); +} + +static void ipi_sig_handler(int sig, siginfo_t *si, void *uc) +{ + int save_errno = errno; + + signals_enabled = 0; + um_trace_signals_off(); + + uml_ipi_handler(si->si_value.sival_int); + + um_trace_signals_on(); + signals_enabled = 1; + + errno = save_errno; +} + +void __init os_init_smp(void) +{ + struct sigaction action = { + .sa_sigaction = ipi_sig_handler, + .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART, + }; + + sigfillset(&action.sa_mask); + + if (sigaction(IPI_SIGNAL, &action, NULL) < 0) + panic("%s: sigaction failed, errno = %d", __func__, errno); + + cpu_threads[0] = pthread_self(); +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 8b19dca83f71..054ac03bbf5e 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -22,6 +22,7 @@ #include <asm/unistd.h> #include <init.h> #include <os.h> +#include <smp.h> #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> @@ -481,6 +482,9 @@ void __init os_early_checks(void) fatal("SECCOMP userspace requested but not functional!\n"); } + if (uml_ncpus > 1) + fatal("SMP is not supported with PTRACE userspace.\n"); + using_seccomp = 0; check_ptrace(); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index e0197bfe4ac9..13ebc86918d4 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -11,9 +11,11 @@ #include <errno.h> #include <signal.h> #include <time.h> +#include <sys/signalfd.h> #include <sys/time.h> #include <kern_util.h> #include <os.h> +#include <smp.h> #include <string.h> #include "internal.h" @@ -41,7 +43,8 @@ long long os_persistent_clock_emulation(void) */ int os_timer_create(void) { - timer_t *t = &event_high_res_timer[0]; + int cpu = uml_curr_cpu(); + timer_t *t = &event_high_res_timer[cpu]; struct sigevent sev = { .sigev_notify = SIGEV_THREAD_ID, .sigev_signo = SIGALRM, @@ -105,24 +108,49 @@ long long os_nsecs(void) return timespec_to_ns(&ts); } +static __thread int wake_signals; + +void os_idle_prepare(void) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, IPI_SIGNAL); + + /* + * We need to use signalfd rather than sigsuspend in idle sleep + * because the IPI signal is a real-time signal that carries data, + * and unlike handling SIGALRM, we cannot simply flag it in + * signals_pending. + */ + wake_signals = signalfd(-1, &set, SFD_CLOEXEC); + if (wake_signals < 0) + panic("Failed to create signal FD, errno = %d", errno); +} + /** * os_idle_sleep() - sleep until interrupted */ void os_idle_sleep(void) { - sigset_t set, old; + sigset_t set; - /* Block SIGALRM while performing the need_resched check. */ + /* + * Block SIGALRM while performing the need_resched check. + * Note that, because IRQs are disabled, the IPI signal is + * already blocked. + */ sigemptyset(&set); sigaddset(&set, SIGALRM); - sigprocmask(SIG_BLOCK, &set, &old); + sigprocmask(SIG_BLOCK, &set, NULL); /* * Because disabling IRQs does not block SIGALRM, it is also * necessary to check for any pending timer alarms. */ if (!uml_need_resched() && !timer_alarm_pending()) - sigsuspend(&old); + os_poll(1, &wake_signals); /* Restore the signal mask. */ sigprocmask(SIG_UNBLOCK, &set, NULL); |
