summaryrefslogtreecommitdiff
path: root/arch/um/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/skas/mmu.c3
-rw-r--r--arch/um/kernel/skas/stub.c87
-rw-r--r--arch/um/kernel/skas/stub_exe.c40
3 files changed, 111 insertions, 19 deletions
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 87a18ae4da19..849fafa4b54f 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -78,6 +78,9 @@ void destroy_context(struct mm_struct *mm)
mmu->id.pid = -1;
}
+ if (using_seccomp && mmu->id.sock)
+ os_close_file(mmu->id.sock);
+
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
guard(spinlock_irqsave)(&mm_list_lock);
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 9041f6b6e28b..67cab46a602c 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -6,23 +6,53 @@
#include <sysdep/stub.h>
#include <linux/futex.h>
+#include <sys/socket.h>
#include <errno.h>
-static __always_inline int syscall_handler(struct stub_data *d)
+/*
+ * Known security issues
+ *
+ * Userspace can jump to this address to execute *any* syscall that is
+ * permitted by the stub. As we will return afterwards, it can do
+ * whatever it likes, including:
+ * - Tricking the kernel into handing out the memory FD
+ * - Using this memory FD to read/write all physical memory
+ * - Running in parallel to the kernel processing a syscall
+ * (possibly creating data races?)
+ * - Blocking e.g. SIGALRM to avoid time based scheduling
+ *
+ * To avoid this, the permitted location for each syscall needs to be
+ * checked for in the SECCOMP filter (which is reasonably simple). Also,
+ * more care will need to go into considerations how the code might be
+ * tricked by using a prepared stack (or even modifying the stack from
+ * another thread in case SMP support is added).
+ *
+ * As for the SIGALRM, the best counter measure will be to check in the
+ * kernel that the process is reporting back the SIGALRM in a timely
+ * fashion.
+ */
+static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
{
+ struct stub_data *d = get_stub_data();
int i;
unsigned long res;
+ int fd;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
+ if (fd_map)
+ fd = fd_map[sc->mem.fd];
+ else
+ fd = sc->mem.fd;
+
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
- sc->mem.fd, sc->mem.offset);
+ fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
@@ -54,9 +84,7 @@ static __always_inline int syscall_handler(struct stub_data *d)
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
- struct stub_data *d = get_stub_data();
-
- syscall_handler(d);
+ syscall_handler(NULL);
trap_myself();
}
@@ -65,7 +93,25 @@ void __section(".__syscall_stub")
stub_signal_interrupt(int sig, siginfo_t *info, void *p)
{
struct stub_data *d = get_stub_data();
+ char rcv_data;
+ union {
+ char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
+ struct cmsghdr align;
+ } ctrl = {};
+ struct iovec iov = {
+ .iov_base = &rcv_data,
+ .iov_len = 1,
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
ucontext_t *uc = p;
+ struct cmsghdr *fd_msg;
+ int *fd_map;
+ int num_fds;
long res;
d->signal = sig;
@@ -78,6 +124,7 @@ restart_wait:
res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
FUTEX_WAKE, 1);
} while (res == -EINTR);
+
do {
res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
FUTEX_WAIT, FUTEX_IN_KERN, 0);
@@ -86,11 +133,37 @@ restart_wait:
if (res < 0 && res != -EAGAIN)
stub_syscall1(__NR_exit_group, 1);
- /* Try running queued syscalls. */
- if (syscall_handler(d) < 0 || d->restart_wait) {
+ if (d->syscall_data_len) {
+ /* Read passed FDs (if any) */
+ do {
+ res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
+ } while (res == -EINTR);
+
+ /* We should never have a receive error (other than -EAGAIN) */
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall1(__NR_exit_group, 1);
+
+ /* Receive the FDs */
+ num_fds = 0;
+ fd_msg = msghdr.msg_control;
+ fd_map = (void *)&CMSG_DATA(fd_msg);
+ if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
+ num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ /* Try running queued syscalls. */
+ res = syscall_handler(fd_map);
+
+ while (num_fds)
+ stub_syscall2(__NR_close, fd_map[--num_fds], 0);
+ } else {
+ res = 0;
+ }
+
+ if (res < 0 || d->restart_wait) {
/* Report SIGSYS if we restart. */
d->signal = SIGSYS;
d->restart_wait = 0;
+
goto restart_wait;
}
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
index f40f2332b676..cbafaa684e66 100644
--- a/arch/um/kernel/skas/stub_exe.c
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -1,5 +1,6 @@
#include <sys/ptrace.h>
#include <sys/prctl.h>
+#include <sys/fcntl.h>
#include <asm/unistd.h>
#include <sysdep/stub.h>
#include <stub-data.h>
@@ -45,7 +46,11 @@ noinline static void real_init(void)
if (res != sizeof(init_data))
stub_syscall1(__NR_exit, 10);
- stub_syscall1(__NR_close, 0);
+ /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
+ if (!init_data.seccomp)
+ stub_syscall1(__NR_close, 0);
+ else
+ stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
/* map stub code + data */
res = stub_syscall6(STUB_MMAP_NR,
@@ -63,6 +68,13 @@ noinline static void real_init(void)
if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
stub_syscall1(__NR_exit, 12);
+ /* In SECCOMP mode, we only need the signalling FD from now on */
+ if (init_data.seccomp) {
+ res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
+ if (res != 0)
+ stub_syscall1(__NR_exit, 13);
+ }
+
/* setup signal stack inside stub data */
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
@@ -77,7 +89,7 @@ noinline static void real_init(void)
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 13);
+ stub_syscall1(__NR_exit, 14);
} else {
/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
sa.sa_mask = ~0ULL;
@@ -85,32 +97,32 @@ noinline static void real_init(void)
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 14);
+ stub_syscall1(__NR_exit, 15);
res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 15);
+ stub_syscall1(__NR_exit, 16);
res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 16);
+ stub_syscall1(__NR_exit, 17);
res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 17);
+ stub_syscall1(__NR_exit, 18);
res = stub_syscall4(__NR_rt_sigaction, SIGILL,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 18);
+ stub_syscall1(__NR_exit, 19);
res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
if (res != 0)
- stub_syscall1(__NR_exit, 19);
+ stub_syscall1(__NR_exit, 20);
}
/*
@@ -153,8 +165,12 @@ noinline static void real_init(void)
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
offsetof(struct seccomp_data, nr)),
- /* [10-14] Check against permitted syscalls */
+ /* [10-16] Check against permitted syscalls */
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
+ 7, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
+ 6, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
4, 0),
@@ -170,10 +186,10 @@ noinline static void real_init(void)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
1, 0),
- /* [15] Not one of the permitted syscalls */
+ /* [17] Not one of the permitted syscalls */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
- /* [16] Permitted call for the stub */
+ /* [18] Permitted call for the stub */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -184,7 +200,7 @@ noinline static void real_init(void)
if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_TSYNC,
(unsigned long)&prog) != 0)
- stub_syscall1(__NR_exit, 20);
+ stub_syscall1(__NR_exit, 21);
/* Fall through, the exit syscall will cause SIGSYS */
} else {