diff options
Diffstat (limited to 'arch/um/kernel')
| -rw-r--r-- | arch/um/kernel/skas/mmu.c | 3 | ||||
| -rw-r--r-- | arch/um/kernel/skas/stub.c | 87 | ||||
| -rw-r--r-- | arch/um/kernel/skas/stub_exe.c | 40 |
3 files changed, 111 insertions, 19 deletions
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 87a18ae4da19..849fafa4b54f 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -78,6 +78,9 @@ void destroy_context(struct mm_struct *mm) mmu->id.pid = -1; } + if (using_seccomp && mmu->id.sock) + os_close_file(mmu->id.sock); + free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); guard(spinlock_irqsave)(&mm_list_lock); diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 9041f6b6e28b..67cab46a602c 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -6,23 +6,53 @@ #include <sysdep/stub.h> #include <linux/futex.h> +#include <sys/socket.h> #include <errno.h> -static __always_inline int syscall_handler(struct stub_data *d) +/* + * Known security issues + * + * Userspace can jump to this address to execute *any* syscall that is + * permitted by the stub. As we will return afterwards, it can do + * whatever it likes, including: + * - Tricking the kernel into handing out the memory FD + * - Using this memory FD to read/write all physical memory + * - Running in parallel to the kernel processing a syscall + * (possibly creating data races?) + * - Blocking e.g. SIGALRM to avoid time based scheduling + * + * To avoid this, the permitted location for each syscall needs to be + * checked for in the SECCOMP filter (which is reasonably simple). Also, + * more care will need to go into considerations how the code might be + * tricked by using a prepared stack (or even modifying the stack from + * another thread in case SMP support is added). + * + * As for the SIGALRM, the best counter measure will be to check in the + * kernel that the process is reporting back the SIGALRM in a timely + * fashion. + */ +static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS]) { + struct stub_data *d = get_stub_data(); int i; unsigned long res; + int fd; for (i = 0; i < d->syscall_data_len; i++) { struct stub_syscall *sc = &d->syscall_data[i]; switch (sc->syscall) { case STUB_SYSCALL_MMAP: + if (fd_map) + fd = fd_map[sc->mem.fd]; + else + fd = sc->mem.fd; + res = stub_syscall6(STUB_MMAP_NR, sc->mem.addr, sc->mem.length, sc->mem.prot, MAP_SHARED | MAP_FIXED, - sc->mem.fd, sc->mem.offset); + fd, sc->mem.offset); if (res != sc->mem.addr) { d->err = res; d->syscall_data_len = i; @@ -54,9 +84,7 @@ static __always_inline int syscall_handler(struct stub_data *d) void __section(".__syscall_stub") stub_syscall_handler(void) { - struct stub_data *d = get_stub_data(); - - syscall_handler(d); + syscall_handler(NULL); trap_myself(); } @@ -65,7 +93,25 @@ void __section(".__syscall_stub") stub_signal_interrupt(int sig, siginfo_t *info, void *p) { struct stub_data *d = get_stub_data(); + char rcv_data; + union { + char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)]; + struct cmsghdr align; + } ctrl = {}; + struct iovec iov = { + .iov_base = &rcv_data, + .iov_len = 1, + }; + struct msghdr msghdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &ctrl, + .msg_controllen = sizeof(ctrl), + }; ucontext_t *uc = p; + struct cmsghdr *fd_msg; + int *fd_map; + int num_fds; long res; d->signal = sig; @@ -78,6 +124,7 @@ restart_wait: res = stub_syscall3(__NR_futex, (unsigned long)&d->futex, FUTEX_WAKE, 1); } while (res == -EINTR); + do { res = stub_syscall4(__NR_futex, (unsigned long)&d->futex, FUTEX_WAIT, FUTEX_IN_KERN, 0); @@ -86,11 +133,37 @@ restart_wait: if (res < 0 && res != -EAGAIN) stub_syscall1(__NR_exit_group, 1); - /* Try running queued syscalls. */ - if (syscall_handler(d) < 0 || d->restart_wait) { + if (d->syscall_data_len) { + /* Read passed FDs (if any) */ + do { + res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0); + } while (res == -EINTR); + + /* We should never have a receive error (other than -EAGAIN) */ + if (res < 0 && res != -EAGAIN) + stub_syscall1(__NR_exit_group, 1); + + /* Receive the FDs */ + num_fds = 0; + fd_msg = msghdr.msg_control; + fd_map = (void *)&CMSG_DATA(fd_msg); + if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr)) + num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + + /* Try running queued syscalls. */ + res = syscall_handler(fd_map); + + while (num_fds) + stub_syscall2(__NR_close, fd_map[--num_fds], 0); + } else { + res = 0; + } + + if (res < 0 || d->restart_wait) { /* Report SIGSYS if we restart. */ d->signal = SIGSYS; d->restart_wait = 0; + goto restart_wait; } diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c index f40f2332b676..cbafaa684e66 100644 --- a/arch/um/kernel/skas/stub_exe.c +++ b/arch/um/kernel/skas/stub_exe.c @@ -1,5 +1,6 @@ #include <sys/ptrace.h> #include <sys/prctl.h> +#include <sys/fcntl.h> #include <asm/unistd.h> #include <sysdep/stub.h> #include <stub-data.h> @@ -45,7 +46,11 @@ noinline static void real_init(void) if (res != sizeof(init_data)) stub_syscall1(__NR_exit, 10); - stub_syscall1(__NR_close, 0); + /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */ + if (!init_data.seccomp) + stub_syscall1(__NR_close, 0); + else + stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK); /* map stub code + data */ res = stub_syscall6(STUB_MMAP_NR, @@ -63,6 +68,13 @@ noinline static void real_init(void) if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) stub_syscall1(__NR_exit, 12); + /* In SECCOMP mode, we only need the signalling FD from now on */ + if (init_data.seccomp) { + res = stub_syscall3(__NR_close_range, 1, ~0U, 0); + if (res != 0) + stub_syscall1(__NR_exit, 13); + } + /* setup signal stack inside stub data */ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); @@ -77,7 +89,7 @@ noinline static void real_init(void) res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 13); + stub_syscall1(__NR_exit, 14); } else { /* SECCOMP mode uses rt_sigreturn, need to mask all signals */ sa.sa_mask = ~0ULL; @@ -85,32 +97,32 @@ noinline static void real_init(void) res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 14); + stub_syscall1(__NR_exit, 15); res = stub_syscall4(__NR_rt_sigaction, SIGSYS, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 15); + stub_syscall1(__NR_exit, 16); res = stub_syscall4(__NR_rt_sigaction, SIGALRM, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 16); + stub_syscall1(__NR_exit, 17); res = stub_syscall4(__NR_rt_sigaction, SIGTRAP, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 17); + stub_syscall1(__NR_exit, 18); res = stub_syscall4(__NR_rt_sigaction, SIGILL, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 18); + stub_syscall1(__NR_exit, 19); res = stub_syscall4(__NR_rt_sigaction, SIGFPE, (unsigned long)&sa, 0, sizeof(sa.sa_mask)); if (res != 0) - stub_syscall1(__NR_exit, 19); + stub_syscall1(__NR_exit, 20); } /* @@ -153,8 +165,12 @@ noinline static void real_init(void) BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)), - /* [10-14] Check against permitted syscalls */ + /* [10-16] Check against permitted syscalls */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, + 7, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg, + 6, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close, 5, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, 4, 0), @@ -170,10 +186,10 @@ noinline static void real_init(void) BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 1, 0), - /* [15] Not one of the permitted syscalls */ + /* [17] Not one of the permitted syscalls */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), - /* [16] Permitted call for the stub */ + /* [18] Permitted call for the stub */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { @@ -184,7 +200,7 @@ noinline static void real_init(void) if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, (unsigned long)&prog) != 0) - stub_syscall1(__NR_exit, 20); + stub_syscall1(__NR_exit, 21); /* Fall through, the exit syscall will cause SIGSYS */ } else { |
