summaryrefslogtreecommitdiff
path: root/arch/um/os-Linux
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r--arch/um/os-Linux/Makefile4
-rw-r--r--arch/um/os-Linux/internal.h8
-rw-r--r--arch/um/os-Linux/process.c20
-rw-r--r--arch/um/os-Linux/signal.c31
-rw-r--r--arch/um/os-Linux/skas/process.c39
-rw-r--r--arch/um/os-Linux/smp.c148
-rw-r--r--arch/um/os-Linux/start_up.c4
-rw-r--r--arch/um/os-Linux/time.c38
8 files changed, 267 insertions, 25 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index fae836713487..70c73c22f715 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than
obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
+obj-$(CONFIG_SMP) += smp.o
+
USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
- tty.o umid.o util.o
+ tty.o umid.o util.o smp.o
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
index c2c7a0dc673c..bac9fcc8c14c 100644
--- a/arch/um/os-Linux/internal.h
+++ b/arch/um/os-Linux/internal.h
@@ -4,6 +4,7 @@
#include <mm_id.h>
#include <stub-data.h>
+#include <signal.h>
/*
* elf_aux.c
@@ -18,6 +19,7 @@ void check_tmpexec(void);
/*
* signal.c
*/
+extern __thread int signals_enabled;
int timer_alarm_pending(void);
/*
@@ -25,4 +27,10 @@ int timer_alarm_pending(void);
*/
void wait_stub_done(int pid);
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
+
+/*
+ * smp.c
+ */
+#define IPI_SIGNAL SIGRTMIN
+
#endif /* __UM_OS_LINUX_INTERNAL_H */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 00b49e90d05f..3a2a84ab9325 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -10,6 +10,8 @@
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
+#include <limits.h>
+#include <linux/futex.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/prctl.h>
@@ -189,3 +191,21 @@ void os_set_pdeathsig(void)
{
prctl(PR_SET_PDEATHSIG, SIGKILL);
}
+
+int os_futex_wait(void *uaddr, unsigned int val)
+{
+ int r;
+
+ CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val,
+ NULL, NULL, 0));
+ return r < 0 ? -errno : r;
+}
+
+int os_futex_wake(void *uaddr)
+{
+ int r;
+
+ CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX,
+ NULL, NULL, 0));
+ return r < 0 ? -errno : r;
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 554a87dd32cc..327fb3c52fc7 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -69,7 +69,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGCHLD_BIT 2
#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
-static __thread int signals_enabled;
+__thread int signals_enabled;
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
#endif
@@ -259,9 +259,29 @@ int change_sig(int signal, int on)
return 0;
}
-void block_signals(void)
+static inline void __block_signals(void)
{
+ if (!signals_enabled)
+ return;
+
+ os_local_ipi_disable();
+ barrier();
signals_enabled = 0;
+}
+
+static inline void __unblock_signals(void)
+{
+ if (signals_enabled)
+ return;
+
+ signals_enabled = 1;
+ barrier();
+ os_local_ipi_enable();
+}
+
+void block_signals(void)
+{
+ __block_signals();
/*
* This must return with signals disabled, so this barrier
* ensures that writes are flushed out before the return.
@@ -278,7 +298,8 @@ void unblock_signals(void)
if (signals_enabled == 1)
return;
- signals_enabled = 1;
+ __unblock_signals();
+
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
deliver_time_travel_irqs();
#endif
@@ -312,7 +333,7 @@ void unblock_signals(void)
* tracing that happens inside the handlers we call for the
* pending signals will mess up the tracing state.
*/
- signals_enabled = 0;
+ __block_signals();
um_trace_signals_off();
/*
@@ -344,7 +365,7 @@ void unblock_signals(void)
/* Re-enable signals and trace that we're doing so. */
um_trace_signals_on();
- signals_enabled = 1;
+ __unblock_signals();
}
}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 820846ff7179..d6c22f8aa06d 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -546,7 +546,7 @@ extern unsigned long tt_extra_sched_jiffies;
void userspace(struct uml_pt_regs *regs)
{
int err, status, op;
- siginfo_t si_ptrace;
+ siginfo_t si_local;
siginfo_t *si;
int sig;
@@ -557,6 +557,13 @@ void userspace(struct uml_pt_regs *regs)
struct mm_id *mm_id = current_mm_id();
/*
+ * At any given time, only one CPU thread can enter the
+ * turnstile to operate on the same stub process, including
+ * executing stub system calls (mmap and munmap).
+ */
+ enter_turnstile(mm_id);
+
+ /*
* When we are in time-travel mode, userspace can theoretically
* do a *lot* of work without being scheduled. The problem with
* this is that it will prevent kernel bookkeeping (primarily
@@ -623,9 +630,10 @@ void userspace(struct uml_pt_regs *regs)
}
if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
- panic("%s - Invalid siginfo offset from child",
- __func__);
- si = (void *)&proc_data->sigstack[proc_data->si_offset];
+ panic("%s - Invalid siginfo offset from child", __func__);
+
+ si = &si_local;
+ memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si));
regs->is_user = 1;
@@ -721,8 +729,8 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE:
case SIGWINCH:
ptrace(PTRACE_GETSIGINFO, pid, 0,
- (struct siginfo *)&si_ptrace);
- si = &si_ptrace;
+ (struct siginfo *)&si_local);
+ si = &si_local;
break;
default:
si = NULL;
@@ -733,6 +741,8 @@ void userspace(struct uml_pt_regs *regs)
}
}
+ exit_turnstile(mm_id);
+
UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
if (sig) {
@@ -802,10 +812,9 @@ void switch_threads(jmp_buf *me, jmp_buf *you)
static jmp_buf initial_jmpbuf;
-/* XXX Make these percpu */
-static void (*cb_proc)(void *arg);
-static void *cb_arg;
-static jmp_buf *cb_back;
+static __thread void (*cb_proc)(void *arg);
+static __thread void *cb_arg;
+static __thread jmp_buf *cb_back;
int start_idle_thread(void *stack, jmp_buf *switch_buf)
{
@@ -859,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
cb_arg = arg;
cb_back = &here;
- block_signals_trace();
+ initial_jmpbuf_lock();
if (UML_SETJMP(&here) == 0)
UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
- unblock_signals_trace();
+ initial_jmpbuf_unlock();
cb_proc = NULL;
cb_arg = NULL;
@@ -871,8 +880,9 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
void halt_skas(void)
{
- block_signals_trace();
+ initial_jmpbuf_lock();
UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
+ /* unreachable */
}
static bool noreboot;
@@ -892,6 +902,7 @@ __uml_setup("noreboot", noreboot_cmd_param,
void reboot_skas(void)
{
- block_signals_trace();
+ initial_jmpbuf_lock();
UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT);
+ /* unreachable */
}
diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c
new file mode 100644
index 000000000000..18d3858a7cd2
--- /dev/null
+++ b/arch/um/os-Linux/smp.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ * Author: Tiwei Bie <tiwei.btw@antgroup.com>
+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <kern_util.h>
+#include <um_malloc.h>
+#include <init.h>
+#include <os.h>
+#include <smp.h>
+#include "internal.h"
+
+struct cpu_thread_data {
+ int cpu;
+ sigset_t sigset;
+};
+
+static __thread int __curr_cpu;
+
+int uml_curr_cpu(void)
+{
+ return __curr_cpu;
+}
+
+static pthread_t cpu_threads[CONFIG_NR_CPUS];
+
+static void *cpu_thread(void *arg)
+{
+ struct cpu_thread_data *data = arg;
+
+ __curr_cpu = data->cpu;
+
+ uml_start_secondary(data);
+
+ return NULL;
+}
+
+int os_start_cpu_thread(int cpu)
+{
+ struct cpu_thread_data *data;
+ sigset_t sigset, oset;
+ int err;
+
+ data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC);
+ if (!data)
+ return -ENOMEM;
+
+ sigfillset(&sigset);
+ if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) {
+ err = errno;
+ goto err;
+ }
+
+ data->cpu = cpu;
+ data->sigset = oset;
+
+ err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data);
+ if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0)
+ panic("Failed to restore the signal mask, errno = %d", errno);
+ if (err != 0)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(data);
+ return -err;
+}
+
+void os_start_secondary(void *arg, jmp_buf *switch_buf)
+{
+ struct cpu_thread_data *data = arg;
+
+ sigaddset(&data->sigset, IPI_SIGNAL);
+ sigaddset(&data->sigset, SIGIO);
+
+ if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0)
+ panic("Failed to restore the signal mask, errno = %d", errno);
+
+ kfree(data);
+ longjmp(*switch_buf, 1);
+
+ /* unreachable */
+ printk(UM_KERN_ERR "impossible long jump!");
+ fatal_sigsegv();
+}
+
+int os_send_ipi(int cpu, int vector)
+{
+ union sigval value = { .sival_int = vector };
+
+ return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value);
+}
+
+static void __local_ipi_set(int enable)
+{
+ sigset_t sigset;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, IPI_SIGNAL);
+
+ if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0)
+ panic("%s: sigprocmask failed, errno = %d", __func__, errno);
+}
+
+void os_local_ipi_enable(void)
+{
+ __local_ipi_set(1);
+}
+
+void os_local_ipi_disable(void)
+{
+ __local_ipi_set(0);
+}
+
+static void ipi_sig_handler(int sig, siginfo_t *si, void *uc)
+{
+ int save_errno = errno;
+
+ signals_enabled = 0;
+ um_trace_signals_off();
+
+ uml_ipi_handler(si->si_value.sival_int);
+
+ um_trace_signals_on();
+ signals_enabled = 1;
+
+ errno = save_errno;
+}
+
+void __init os_init_smp(void)
+{
+ struct sigaction action = {
+ .sa_sigaction = ipi_sig_handler,
+ .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART,
+ };
+
+ sigfillset(&action.sa_mask);
+
+ if (sigaction(IPI_SIGNAL, &action, NULL) < 0)
+ panic("%s: sigaction failed, errno = %d", __func__, errno);
+
+ cpu_threads[0] = pthread_self();
+}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 8b19dca83f71..054ac03bbf5e 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -22,6 +22,7 @@
#include <asm/unistd.h>
#include <init.h>
#include <os.h>
+#include <smp.h>
#include <kern_util.h>
#include <mem_user.h>
#include <ptrace_user.h>
@@ -481,6 +482,9 @@ void __init os_early_checks(void)
fatal("SECCOMP userspace requested but not functional!\n");
}
+ if (uml_ncpus > 1)
+ fatal("SMP is not supported with PTRACE userspace.\n");
+
using_seccomp = 0;
check_ptrace();
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index e0197bfe4ac9..13ebc86918d4 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -11,9 +11,11 @@
#include <errno.h>
#include <signal.h>
#include <time.h>
+#include <sys/signalfd.h>
#include <sys/time.h>
#include <kern_util.h>
#include <os.h>
+#include <smp.h>
#include <string.h>
#include "internal.h"
@@ -41,7 +43,8 @@ long long os_persistent_clock_emulation(void)
*/
int os_timer_create(void)
{
- timer_t *t = &event_high_res_timer[0];
+ int cpu = uml_curr_cpu();
+ timer_t *t = &event_high_res_timer[cpu];
struct sigevent sev = {
.sigev_notify = SIGEV_THREAD_ID,
.sigev_signo = SIGALRM,
@@ -105,24 +108,49 @@ long long os_nsecs(void)
return timespec_to_ns(&ts);
}
+static __thread int wake_signals;
+
+void os_idle_prepare(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGALRM);
+ sigaddset(&set, IPI_SIGNAL);
+
+ /*
+ * We need to use signalfd rather than sigsuspend in idle sleep
+ * because the IPI signal is a real-time signal that carries data,
+ * and unlike handling SIGALRM, we cannot simply flag it in
+ * signals_pending.
+ */
+ wake_signals = signalfd(-1, &set, SFD_CLOEXEC);
+ if (wake_signals < 0)
+ panic("Failed to create signal FD, errno = %d", errno);
+}
+
/**
* os_idle_sleep() - sleep until interrupted
*/
void os_idle_sleep(void)
{
- sigset_t set, old;
+ sigset_t set;
- /* Block SIGALRM while performing the need_resched check. */
+ /*
+ * Block SIGALRM while performing the need_resched check.
+ * Note that, because IRQs are disabled, the IPI signal is
+ * already blocked.
+ */
sigemptyset(&set);
sigaddset(&set, SIGALRM);
- sigprocmask(SIG_BLOCK, &set, &old);
+ sigprocmask(SIG_BLOCK, &set, NULL);
/*
* Because disabling IRQs does not block SIGALRM, it is also
* necessary to check for any pending timer alarms.
*/
if (!uml_need_resched() && !timer_alarm_pending())
- sigsuspend(&old);
+ os_poll(1, &wake_signals);
/* Restore the signal mask. */
sigprocmask(SIG_UNBLOCK, &set, NULL);