diff options
author | Will Drewry <wad@chromium.org> | 2012-04-12 16:48:04 -0500 |
---|---|---|
committer | James Morris <james.l.morris@oracle.com> | 2012-04-14 11:13:22 +1000 |
commit | 8ac270d1e29f0428228ab2b9a8ae5e1ed4a5cd84 (patch) | |
tree | 6deba4ed83da9ace758004b29d15aa0d2ec875a7 /samples/seccomp/bpf-direct.c | |
parent | c6cfbeb4029610c8c330c312dcf4d514cc067554 (diff) |
Documentation: prctl/seccomp_filter
Documents how system call filtering using Berkeley Packet
Filter programs works and how it may be used.
Includes an example for x86 and a semi-generic
example using a macro-based code generator.
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Will Drewry <wad@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
v18: - added acked by
- update no new privs numbers
v17: - remove @compat note and add Pitfalls section for arch checking
(keescook@chromium.org)
v16: -
v15: -
v14: - rebase/nochanges
v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc
v12: - comment on the ptrace_event use
- update arch support comment
- note the behavior of SECCOMP_RET_DATA when there are multiple filters
(keescook@chromium.org)
- lots of samples/ clean up incl 64-bit bpf-direct support
(markus@chromium.org)
- rebase to linux-next
v11: - overhaul return value language, updates (keescook@chromium.org)
- comment on do_exit(SIGSYS)
v10: - update for SIGSYS
- update for new seccomp_data layout
- update for ptrace option use
v9: - updated bpf-direct.c for SIGILL
v8: - add PR_SET_NO_NEW_PRIVS to the samples.
v7: - updated for all the new stuff in v7: TRAP, TRACE
- only talk about PR_SET_SECCOMP now
- fixed bad JLE32 check (coreyb@linux.vnet.ibm.com)
- adds dropper.c: a simple system call disabler
v6: - tweak the language to note the requirement of
PR_SET_NO_NEW_PRIVS being called prior to use. (luto@mit.edu)
v5: - update sample to use system call arguments
- adds a "fancy" example using a macro-based generator
- cleaned up bpf in the sample
- update docs to mention arguments
- fix prctl value (eparis@redhat.com)
- language cleanup (rdunlap@xenotime.net)
v4: - update for no_new_privs use
- minor tweaks
v3: - call out BPF <-> Berkeley Packet Filter (rdunlap@xenotime.net)
- document use of tentative always-unprivileged
- guard sample compilation for i386 and x86_64
v2: - move code to samples (corbet@lwn.net)
Signed-off-by: James Morris <james.l.morris@oracle.com>
Diffstat (limited to 'samples/seccomp/bpf-direct.c')
-rw-r--r-- | samples/seccomp/bpf-direct.c | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c new file mode 100644 index 000000000000..26f523e6ed74 --- /dev/null +++ b/samples/seccomp/bpf-direct.c @@ -0,0 +1,176 @@ +/* + * Seccomp filter example for x86 (32-bit and 64-bit) with BPF macros + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_SET_SECCOMP, 2, ...). + */ +#define __USE_GNU 1 +#define _GNU_SOURCE 1 + +#include <linux/types.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <linux/unistd.h> +#include <signal.h> +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#define syscall_nr (offsetof(struct seccomp_data, nr)) + +#if defined(__i386__) +#define REG_RESULT REG_EAX +#define REG_SYSCALL REG_EAX +#define REG_ARG0 REG_EBX +#define REG_ARG1 REG_ECX +#define REG_ARG2 REG_EDX +#define REG_ARG3 REG_ESI +#define REG_ARG4 REG_EDI +#define REG_ARG5 REG_EBP +#elif defined(__x86_64__) +#define REG_RESULT REG_RAX +#define REG_SYSCALL REG_RAX +#define REG_ARG0 REG_RDI +#define REG_ARG1 REG_RSI +#define REG_ARG2 REG_RDX +#define REG_ARG3 REG_R10 +#define REG_ARG4 REG_R8 +#define REG_ARG5 REG_R9 +#else +#error Unsupported platform +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#endif + +#ifndef SYS_SECCOMP +#define SYS_SECCOMP 1 +#endif + +static void emulator(int nr, siginfo_t *info, void *void_context) +{ + ucontext_t *ctx = (ucontext_t *)(void_context); + int syscall; + char *buf; + ssize_t bytes; + size_t len; + if (info->si_code != SYS_SECCOMP) + return; + if (!ctx) + return; + syscall = ctx->uc_mcontext.gregs[REG_SYSCALL]; + buf = (char *) ctx->uc_mcontext.gregs[REG_ARG1]; + len = (size_t) ctx->uc_mcontext.gregs[REG_ARG2]; + + if (syscall != __NR_write) + return; + if (ctx->uc_mcontext.gregs[REG_ARG0] != STDERR_FILENO) + return; + /* Redirect stderr messages to stdout. Doesn't handle EINTR, etc */ + ctx->uc_mcontext.gregs[REG_RESULT] = -1; + if (write(STDOUT_FILENO, "[ERR] ", 6) > 0) { + bytes = write(STDOUT_FILENO, buf, len); + ctx->uc_mcontext.gregs[REG_RESULT] = bytes; + } + return; +} + +static int install_emulator(void) +{ + struct sigaction act; + sigset_t mask; + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &emulator; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGSYS, &act, NULL) < 0) { + perror("sigaction"); + return -1; + } + if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { + perror("sigprocmask"); + return -1; + } + return 0; +} + +static int install_filter(void) +{ + struct sock_filter filter[] = { + /* Grab the system call number */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr), + /* Jump table for the allowed syscalls */ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_rt_sigreturn, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_sigreturn, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), +#endif + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit_group, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_read, 1, 0), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_write, 3, 2), + + /* Check that read is only using stdin. */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + + /* Check that write is only using stdout */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDOUT_FILENO, 1, 0), + /* Trap attempts to write to stderr */ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDERR_FILENO, 1, 2), + + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + perror("prctl(NO_NEW_PRIVS)"); + return 1; + } + + + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl"); + return 1; + } + return 0; +} + +#define payload(_c) (_c), sizeof((_c)) +int main(int argc, char **argv) +{ + char buf[4096]; + ssize_t bytes = 0; + if (install_emulator()) + return 1; + if (install_filter()) + return 1; + syscall(__NR_write, STDOUT_FILENO, + payload("OHAI! WHAT IS YOUR NAME? ")); + bytes = syscall(__NR_read, STDIN_FILENO, buf, sizeof(buf)); + syscall(__NR_write, STDOUT_FILENO, payload("HELLO, ")); + syscall(__NR_write, STDOUT_FILENO, buf, bytes); + syscall(__NR_write, STDERR_FILENO, + payload("Error message going to STDERR\n")); + return 0; +} |