diff options
-rw-r--r-- | Documentation/networking/filter.txt | 2 | ||||
-rw-r--r-- | Documentation/sysctl/kernel.txt | 1 | ||||
-rw-r--r-- | Documentation/userspace-api/seccomp_filter.rst | 52 | ||||
-rw-r--r-- | include/linux/audit.h | 6 | ||||
-rw-r--r-- | include/linux/seccomp.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/seccomp.h | 23 | ||||
-rw-r--r-- | kernel/seccomp.c | 321 | ||||
-rw-r--r-- | tools/testing/selftests/seccomp/Makefile | 18 | ||||
-rw-r--r-- | tools/testing/selftests/seccomp/seccomp_benchmark.c | 99 | ||||
-rw-r--r-- | tools/testing/selftests/seccomp/seccomp_bpf.c | 610 |
10 files changed, 1006 insertions, 129 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 789b74dbe1d9..87814859cfc2 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -337,7 +337,7 @@ Examples for low-level BPF: jeq #14, good /* __NR_rt_sigprocmask */ jeq #13, good /* __NR_rt_sigaction */ jeq #35, good /* __NR_nanosleep */ - bad: ret #0 /* SECCOMP_RET_KILL */ + bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ The above example code can be placed into a file (here called "foo"), and diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ce61d1fe08ca..694968c7523c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -75,6 +75,7 @@ show up in /proc/sys/kernel: - reboot-cmd [ SPARC only ] - rtsig-max - rtsig-nr +- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst - sem - sem_next_id [ sysv ipc ] - sg-big-buff [ generic SCSI device (sg) ] diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index f71eb5ef1f2d..099c412951d6 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -87,11 +87,16 @@ Return values A seccomp filter may return any of the following values. If multiple filters exist, the return value for the evaluation of a given system call will always use the highest precedent value. (For example, -``SECCOMP_RET_KILL`` will always take precedence.) +``SECCOMP_RET_KILL_PROCESS`` will always take precedence.) In precedence order, they are: -``SECCOMP_RET_KILL``: +``SECCOMP_RET_KILL_PROCESS``: + Results in the entire process exiting immediately without executing + the system call. The exit status of the task (``status & 0x7f``) + will be ``SIGSYS``, not ``SIGKILL``. + +``SECCOMP_RET_KILL_THREAD``: Results in the task exiting immediately without executing the system call. The exit status of the task (``status & 0x7f``) will be ``SIGSYS``, not ``SIGKILL``. @@ -141,6 +146,15 @@ In precedence order, they are: allow use of ptrace, even of other sandboxed processes, without extreme care; ptracers can use this mechanism to escape.) +``SECCOMP_RET_LOG``: + Results in the system call being executed after it is logged. This + should be used by application developers to learn which syscalls their + application needs without having to iterate through multiple test and + development cycles to build the list. + + This action will only be logged if "log" is present in the + actions_logged sysctl string. + ``SECCOMP_RET_ALLOW``: Results in the system call being executed. @@ -169,7 +183,41 @@ The ``samples/seccomp/`` directory contains both an x86-specific example and a more generic example of a higher level macro interface for BPF program generation. +Sysctls +======= +Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/`` +directory. Here's a description of each file in that directory: + +``actions_avail``: + A read-only ordered list of seccomp return values (refer to the + ``SECCOMP_RET_*`` macros above) in string form. The ordering, from + left-to-right, is the least permissive return value to the most + permissive return value. + + The list represents the set of seccomp return values supported + by the kernel. A userspace program may use this list to + determine if the actions found in the ``seccomp.h``, when the + program was built, differs from the set of actions actually + supported in the current running kernel. + +``actions_logged``: + A read-write ordered list of seccomp return values (refer to the + ``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes + to the file do not need to be in ordered form but reads from the file + will be ordered in the same way as the actions_avail sysctl. + + It is important to note that the value of ``actions_logged`` does not + prevent certain actions from being logged when the audit subsystem is + configured to audit a task. If the action is not found in + ``actions_logged`` list, the final decision on whether to audit the + action for that task is ultimately left up to the audit subsystem to + decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``. + + The ``allow`` string is not accepted in the ``actions_logged`` sysctl + as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting + to write ``allow`` to the sysctl will result in an EINVAL being + returned. Adding architecture support =========================== diff --git a/include/linux/audit.h b/include/linux/audit.h index 74d4d4e8e3db..cb708eb8accc 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -314,11 +314,7 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (!audit_enabled) - return; - - /* Force a record to be reported if a signal was delivered. */ - if (signr || unlikely(!audit_dummy_context())) + if (audit_enabled && unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index ecc296c137cd..c8bef436b61d 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include <uapi/linux/seccomp.h> -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..f6bc1dea3247 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -11,27 +11,34 @@ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ /* Valid operations for seccomp syscall. */ -#define SECCOMP_SET_MODE_STRICT 0 -#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_LOG 2 /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. - * The upper 16-bits are ordered from least permissive values to most. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). * * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 98b59b5db90b..c24579dfa7a1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -17,11 +17,13 @@ #include <linux/audit.h> #include <linux/compat.h> #include <linux/coredump.h> +#include <linux/kmemleak.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/seccomp.h> #include <linux/slab.h> #include <linux/syscalls.h> +#include <linux/sysctl.h> #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include <asm/syscall.h> @@ -42,6 +44,7 @@ * get/put helpers should be used when accessing an instance * outside of a lifetime-guarded section. In general, this * is only needed for handling filters shared across tasks. + * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate * @@ -57,6 +60,7 @@ */ struct seccomp_filter { refcount_t usage; + bool log; struct seccomp_filter *prev; struct bpf_prog *prog; }; @@ -171,10 +175,15 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) /** * seccomp_run_filters - evaluates all seccomp filters against @sd * @sd: optional seccomp data to be passed to filters + * @match: stores struct seccomp_filter that resulted in the return value, + * unless filter returned SECCOMP_RET_ALLOW, in which case it will + * be unchanged. * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(const struct seccomp_data *sd) +#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) +static u32 seccomp_run_filters(const struct seccomp_data *sd, + struct seccomp_filter **match) { struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; @@ -184,7 +193,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd) /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) - return SECCOMP_RET_KILL; + return SECCOMP_RET_KILL_PROCESS; if (!sd) { populate_seccomp_data(&sd_local); @@ -198,8 +207,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd) for (; f; f = f->prev) { u32 cur_ret = BPF_PROG_RUN(f->prog, sd); - if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) + if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { ret = cur_ret; + *match = f; + } } return ret; } @@ -444,6 +455,10 @@ static long seccomp_attach_filter(unsigned int flags, return ret; } + /* Set log flag, if present. */ + if (flags & SECCOMP_FILTER_FLAG_LOG) + filter->log = true; + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -514,6 +529,65 @@ static void seccomp_send_sigsys(int syscall, int reason) } #endif /* CONFIG_SECCOMP_FILTER */ +/* For use with seccomp_actions_logged */ +#define SECCOMP_LOG_KILL_PROCESS (1 << 0) +#define SECCOMP_LOG_KILL_THREAD (1 << 1) +#define SECCOMP_LOG_TRAP (1 << 2) +#define SECCOMP_LOG_ERRNO (1 << 3) +#define SECCOMP_LOG_TRACE (1 << 4) +#define SECCOMP_LOG_LOG (1 << 5) +#define SECCOMP_LOG_ALLOW (1 << 6) + +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | + SECCOMP_LOG_KILL_THREAD | + SECCOMP_LOG_TRAP | + SECCOMP_LOG_ERRNO | + SECCOMP_LOG_TRACE | + SECCOMP_LOG_LOG; + +static inline void seccomp_log(unsigned long syscall, long signr, u32 action, + bool requested) +{ + bool log = false; + + switch (action) { + case SECCOMP_RET_ALLOW: + break; + case SECCOMP_RET_TRAP: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP; + break; + case SECCOMP_RET_ERRNO: + log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO; + break; + case SECCOMP_RET_TRACE: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; + break; + case SECCOMP_RET_LOG: + log = seccomp_actions_logged & SECCOMP_LOG_LOG; + break; + case SECCOMP_RET_KILL_THREAD: + log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; + break; + case SECCOMP_RET_KILL_PROCESS: + default: + log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS; + } + + /* + * Force an audit message to be emitted when the action is RET_KILL_*, + * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is + * allowed to be logged by the admin. + */ + if (log) + return __audit_seccomp(syscall, signr, action); + + /* + * Let the audit subsystem decide if the action should be audited based + * on whether the current task itself is being audited. + */ + return audit_seccomp(syscall, signr, action); +} + /* * Secure computing mode 1 allows only read/write/exit/sigreturn. * To be fully secure this must be combined with rlimit @@ -539,7 +613,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -566,6 +640,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { u32 filter_ret, action; + struct seccomp_filter *match = NULL; int data; /* @@ -574,9 +649,9 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, */ rmb(); - filter_ret = seccomp_run_filters(sd); + filter_ret = seccomp_run_filters(sd, &match); data = filter_ret & SECCOMP_RET_DATA; - action = filter_ret & SECCOMP_RET_ACTION; + action = filter_ret & SECCOMP_RET_ACTION_FULL; switch (action) { case SECCOMP_RET_ERRNO: @@ -637,14 +712,25 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; + case SECCOMP_RET_LOG: + seccomp_log(this_syscall, 0, action, true); + return 0; + case SECCOMP_RET_ALLOW: + /* + * Note that the "match" filter will always be NULL for + * this action since SECCOMP_RET_ALLOW is the starting + * state in seccomp_run_filters(). + */ return 0; - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: + case SECCOMP_RET_KILL_PROCESS: default: - audit_seccomp(this_syscall, SIGSYS, action); + seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ - if (get_nr_threads(current) == 1) { + if (action == SECCOMP_RET_KILL_PROCESS || + get_nr_threads(current) == 1) { siginfo_t info; /* Show the original registers in the dump. */ @@ -653,13 +739,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); } - do_exit(SIGSYS); + if (action == SECCOMP_RET_KILL_PROCESS) + do_group_exit(SIGSYS); + else + do_exit(SIGSYS); } unreachable(); skip: - audit_seccomp(this_syscall, 0, action); + seccomp_log(this_syscall, 0, action, match ? match->log : false); return -1; } #else @@ -794,6 +883,29 @@ static inline long seccomp_set_mode_filter(unsigned int flags, } #endif +static long seccomp_get_action_avail(const char __user *uaction) +{ + u32 action; + + if (copy_from_user(&action, uaction, sizeof(action))) + return -EFAULT; + + switch (action) { + case SECCOMP_RET_KILL_PROCESS: + case SECCOMP_RET_KILL_THREAD: + case SECCOMP_RET_TRAP: + case SECCOMP_RET_ERRNO: + case SECCOMP_RET_TRACE: + case SECCOMP_RET_LOG: + case SECCOMP_RET_ALLOW: + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + /* Common entry point for both prctl and syscall. */ static long do_seccomp(unsigned int op, unsigned int flags, const char __user *uargs) @@ -805,6 +917,11 @@ static long do_seccomp(unsigned int op, unsigned int flags, return seccomp_set_mode_strict(); case SECCOMP_SET_MODE_FILTER: return seccomp_set_mode_filter(flags, uargs); + case SECCOMP_GET_ACTION_AVAIL: + if (flags != 0) + return -EINVAL; + + return seccomp_get_action_avail(uargs); default: return -EINVAL; } @@ -922,3 +1039,185 @@ out: return ret; } #endif + +#ifdef CONFIG_SYSCTL + +/* Human readable action names for friendly sysctl interaction */ +#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process" +#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" +#define SECCOMP_RET_TRAP_NAME "trap" +#define SECCOMP_RET_ERRNO_NAME "errno" +#define SECCOMP_RET_TRACE_NAME "trace" +#define SECCOMP_RET_LOG_NAME "log" +#define SECCOMP_RET_ALLOW_NAME "allow" + +static const char seccomp_actions_avail[] = + SECCOMP_RET_KILL_PROCESS_NAME " " + SECCOMP_RET_KILL_THREAD_NAME " " + SECCOMP_RET_TRAP_NAME " " + SECCOMP_RET_ERRNO_NAME " " + SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_LOG_NAME " " + SECCOMP_RET_ALLOW_NAME; + +struct seccomp_log_name { + u32 log; + const char *name; +}; + +static const struct seccomp_log_name seccomp_log_names[] = { + { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME }, + { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, + { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, + { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, + { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, + { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, + { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, + { } +}; + +static bool seccomp_names_from_actions_logged(char *names, size_t size, + u32 actions_logged) +{ + const struct seccomp_log_name *cur; + bool append_space = false; + + for (cur = seccomp_log_names; cur->name && size; cur++) { + ssize_t ret; + + if (!(actions_logged & cur->log)) + continue; + + if (append_space) { + ret = strscpy(names, " ", size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } else + append_space = true; + + ret = strscpy(names, cur->name, size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } + + return true; +} + +static bool seccomp_action_logged_from_name(u32 *action_logged, + const char *name) +{ + const struct seccomp_log_name *cur; + + for (cur = seccomp_log_names; cur->name; cur++) { + if (!strcmp(cur->name, name)) { + *action_logged = cur->log; + return true; + } + } + + return false; +} + +static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) +{ + char *name; + + *actions_logged = 0; + while ((name = strsep(&names, " ")) && *name) { + u32 action_logged = 0; + + if (!seccomp_action_logged_from_name(&action_logged, name)) + return false; + + *actions_logged |= action_logged; + } + + return true; +} + +static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(seccomp_actions_avail)]; + struct ctl_table table; + int ret; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(names, 0, sizeof(names)); + + if (!write) { + if (!seccomp_names_from_actions_logged(names, sizeof(names), + seccomp_actions_logged)) + return -EINVAL; + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) { + u32 actions_logged; + + if (!seccomp_actions_logged_from_names(&actions_logged, + table.data)) + return -EINVAL; + + if (actions_logged & SECCOMP_LOG_ALLOW) + return -EINVAL; + + seccomp_actions_logged = actions_logged; + } + + return 0; +} + +static struct ctl_path seccomp_sysctl_path[] = { + { .procname = "kernel", }, + { .procname = "seccomp", }, + { } +}; + +static struct ctl_table seccomp_sysctl_table[] = { + { + .procname = "actions_avail", + .data = (void *) &seccomp_actions_avail, + .maxlen = sizeof(seccomp_actions_avail), + .mode = 0444, + .proc_handler = proc_dostring, + }, + { + .procname = "actions_logged", + .mode = 0644, + .proc_handler = seccomp_actions_logged_handler, + }, + { } +}; + +static int __init seccomp_sysctl_init(void) +{ + struct ctl_table_header *hdr; + + hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); + if (!hdr) + pr_warn("seccomp: sysctl registration failed\n"); + else + kmemleak_not_leak(hdr); + + return 0; +} + +device_initcall(seccomp_sysctl_init) + +#endif /* CONFIG_SYSCTL */ diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index aeb0c805f3ca..553d870b4ca9 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,8 +1,16 @@ -TEST_GEN_PROGS := seccomp_bpf -CFLAGS += -Wl,-no-as-needed -Wall -LDFLAGS += -lpthread +all: include ../lib.mk -$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +.PHONY: all clean + +BINARIES := seccomp_bpf seccomp_benchmark +CFLAGS += -Wl,-no-as-needed -Wall + +seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h + $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c new file mode 100644 index 000000000000..5838c8697ec3 --- /dev/null +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -0,0 +1,99 @@ +/* + * Strictly speaking, this is not a test. But it can report during test + * runs so relative performace can be measured. + */ +#define _GNU_SOURCE +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +unsigned long long timing(clockid_t clk_id, unsigned long long samples) +{ + pid_t pid, ret; + unsigned long long i; + struct timespec start, finish; + + pid = getpid(); + assert(clock_gettime(clk_id, &start) == 0); + for (i = 0; i < samples; i++) { + ret = syscall(__NR_getpid); + assert(pid == ret); + } + assert(clock_gettime(clk_id, &finish) == 0); + + i = finish.tv_sec - start.tv_sec; + i *= 1000000000; + i += finish.tv_nsec - start.tv_nsec; + + printf("%lu.%09lu - %lu.%09lu = %llu\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i); + + return i; +} + +unsigned long long calibrate(void) +{ + unsigned long long i; + + printf("Calibrating reasonable sample size...\n"); + + for (i = 5; ; i++) { + unsigned long long samples = 1 << i; + + /* Find something that takes more than 5 seconds to run. */ + if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) + return samples; + } +} + +int main(int argc, char *argv[]) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + unsigned long long samples; + unsigned long long native, filtered; + + if (argc > 1) + samples = strtoull(argv[1], NULL, 0); + else + samples = calibrate(); + + printf("Benchmarking %llu samples...\n", samples); + + native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid native: %llu ns\n", native); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + assert(ret == 0); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + assert(ret == 0); + + filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW: %llu ns\n", filtered); + + printf("Estimated seccomp overhead per syscall: %llu ns\n", + filtered - native); + + if (filtered == native) + printf("Trying running again with more samples.\n"); + + return 0; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4d6f92a9df6b..67c3e2764303 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -68,17 +68,7 @@ #define SECCOMP_MODE_FILTER 2 #endif -#ifndef SECCOMP_RET_KILL -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ - -/* Masks for the return value sections. */ -#define SECCOMP_RET_ACTION 0x7fff0000U -#define SECCOMP_RET_DATA 0x0000ffffU - +#ifndef SECCOMP_RET_ALLOW struct seccomp_data { int nr; __u32 arch; @@ -87,6 +77,70 @@ struct seccomp_data { }; #endif +#ifndef SECCOMP_RET_KILL_PROCESS +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#endif +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#endif +#ifndef SECCOMP_RET_LOG +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#endif + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# elif defined(__hppa__) +# define __NR_seccomp 338 +# elif defined(__powerpc__) +# define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif + +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 +#endif + +#ifndef SECCOMP_FILTER_FLAG_LOG +#define SECCOMP_FILTER_FLAG_LOG 2 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, void *args) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, args); +} +#endif + #if __BYTE_ORDER == __LITTLE_ENDIAN #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) #elif __BYTE_ORDER == __BIG_ENDIAN @@ -136,7 +190,7 @@ TEST(no_new_privs_support) } } -/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +/* Tests kernel support by checking for a copy_from_user() fault on NULL. */ TEST(mode_filter_support) { long ret; @@ -342,6 +396,28 @@ TEST(empty_prog) EXPECT_EQ(EINVAL, errno); } +TEST(log_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + /* getppid() should succeed and be logged (no check for logging) */ + EXPECT_EQ(parent, syscall(__NR_getppid)); +} + TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) { struct sock_filter filter[] = { @@ -520,6 +596,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS) close(fd); } +/* This is a thread task to die via seccomp filter violation. */ +void *kill_thread(void *data) +{ + bool die = (bool)data; + + if (die) { + prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + return (void *)SIBLING_EXIT_FAILURE; + } + + return (void *)SIBLING_EXIT_UNKILLED; +} + +/* Prepare a thread that will kill itself or both of us. */ +void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +{ + pthread_t thread; + void *status; + /* Kill only when calling __NR_prctl. */ + struct sock_filter filter_thread[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_thread = { + .len = (unsigned short)ARRAY_SIZE(filter_thread), + .filter = filter_thread, + }; + struct sock_filter filter_process[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_process = { + .len = (unsigned short)ARRAY_SIZE(filter_process), + .filter = filter_process, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, + kill_process ? &prog_process : &prog_thread)); + + /* + * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS + * flag cannot be downgraded by a new filter. + */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + + /* Start a thread that will exit immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); + + /* Start a thread that will die immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); + + /* + * If we get here, only the spawned thread died. Let the parent know + * the whole process didn't die (i.e. this thread, the spawner, + * stayed running). + */ + exit(42); +} + +TEST(KILL_thread) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, false); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If only the thread was killed, we'll see exit 42. */ + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(42, WEXITSTATUS(status)); +} + +TEST(KILL_process) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, true); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { @@ -541,26 +728,30 @@ TEST(arg_out_of_range) EXPECT_EQ(EINVAL, errno); } +#define ERRNO_FILTER(name, errno) \ + struct sock_filter _read_filter_##name[] = { \ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ + offsetof(struct seccomp_data, nr)), \ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ + }; \ + struct sock_fprog prog_##name = { \ + .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ + .filter = _read_filter_##name, \ + } + +/* Make sure basic errno values are correctly passed through a filter. */ TEST(ERRNO_valid) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(valid, E2BIG); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -568,26 +759,17 @@ TEST(ERRNO_valid) EXPECT_EQ(E2BIG, errno); } +/* Make sure an errno of zero is correctly handled by the arch code. */ TEST(ERRNO_zero) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(zero, 0); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -595,26 +777,21 @@ TEST(ERRNO_zero) EXPECT_EQ(0, read(0, NULL, 0)); } +/* + * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. + * This tests that the errno value gets capped correctly, fixed by + * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). + */ TEST(ERRNO_capped) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(capped, 4096); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -622,6 +799,37 @@ TEST(ERRNO_capped) EXPECT_EQ(4095, errno); } +/* + * Filters are processed in reverse order: last applied is executed first. + * Since only the SECCOMP_RET_ACTION mask is tested for return values, the + * SECCOMP_RET_DATA mask results will follow the most recently applied + * matching filter return (and not the lowest or highest value). + */ +TEST(ERRNO_order) +{ + ERRNO_FILTER(first, 11); + ERRNO_FILTER(second, 13); + ERRNO_FILTER(third, 12); + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(12, errno); +} + FIXTURE_DATA(TRAP) { struct sock_fprog prog; }; @@ -735,6 +943,7 @@ TEST_F(TRAP, handler) FIXTURE_DATA(precedence) { struct sock_fprog allow; + struct sock_fprog log; struct sock_fprog trace; struct sock_fprog error; struct sock_fprog trap; @@ -746,6 +955,13 @@ FIXTURE_SETUP(precedence) struct sock_filter allow_insns[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; + struct sock_filter log_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; struct sock_filter trace_insns[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), @@ -782,6 +998,7 @@ FIXTURE_SETUP(precedence) memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) FILTER_ALLOC(allow); + FILTER_ALLOC(log); FILTER_ALLOC(trace); FILTER_ALLOC(error); FILTER_ALLOC(trap); @@ -792,6 +1009,7 @@ FIXTURE_TEARDOWN(precedence) { #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) FILTER_FREE(allow); + FILTER_FREE(log); FILTER_FREE(trace); FILTER_FREE(error); FILTER_FREE(trap); @@ -809,6 +1027,8 @@ TEST_F(precedence, allow_ok) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -833,6 +1053,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -864,6 +1086,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); @@ -885,6 +1109,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -910,6 +1136,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -931,6 +1159,8 @@ TEST_F(precedence, errno_is_third) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -949,6 +1179,8 @@ TEST_F(precedence, errno_is_third_in_any_order) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); @@ -971,6 +1203,8 @@ TEST_F(precedence, trace_is_fourth) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); /* Should work just fine. */ @@ -992,12 +1226,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); /* Should work just fine. */ EXPECT_EQ(parent, syscall(__NR_getppid)); /* No ptracer */ EXPECT_EQ(-1, syscall(__NR_getpid)); } +TEST_F(precedence, log_is_fifth) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + +TEST_F(precedence, log_is_fifth_in_any_order) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + #ifndef PTRACE_O_TRACESECCOMP #define PTRACE_O_TRACESECCOMP 0x00000080 #endif @@ -1262,6 +1538,13 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* When the syscall return can't be changed, stub out the tests for it. */ +#ifdef SYSCALL_NUM_RET_SHARE_REG +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) +#else +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +#endif + /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ @@ -1357,7 +1640,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = 1; + regs.SYSCALL_RET = EPERM; #endif #ifdef HAVE_GETREGS @@ -1426,6 +1709,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); + if (nr == __NR_open) + change_syscall(_metadata, tracee, -1); } FIXTURE_DATA(TRACE_syscall) { @@ -1480,6 +1765,28 @@ FIXTURE_TEARDOWN(TRACE_syscall) free(self->prog.filter); } +TEST_F(TRACE_syscall, ptrace_syscall_redirected) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer will redirect getpid to getppid. */ + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_dropped) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in EPERM. */ + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); +} + TEST_F(TRACE_syscall, syscall_allowed) { long ret; @@ -1520,13 +1827,8 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); -#ifdef SYSCALL_NUM_RET_SHARE_REG - /* gettid has been skipped */ - EXPECT_EQ(-1, syscall(__NR_gettid)); -#else /* gettid has been skipped and an altered return value stored. */ - EXPECT_EQ(1, syscall(__NR_gettid)); -#endif + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); EXPECT_NE(self->mytid, syscall(__NR_gettid)); } @@ -1557,6 +1859,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) ASSERT_EQ(0, ret); /* Tracer will redirect getpid to getppid, and we should see EPERM. */ + errno = 0; EXPECT_EQ(-1, syscall(__NR_getpid)); EXPECT_EQ(EPERM, errno); } @@ -1654,47 +1957,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -#ifndef __NR_seccomp -# if defined(__i386__) -# define __NR_seccomp 354 -# elif defined(__x86_64__) -# define __NR_seccomp 317 -# elif defined(__arm__) -# define __NR_seccomp 383 -# elif defined(__aarch64__) -# define __NR_seccomp 277 -# elif defined(__hppa__) -# define __NR_seccomp 338 -# elif defined(__powerpc__) -# define __NR_seccomp 358 -# elif defined(__s390__) -# define __NR_seccomp 348 -# else -# warning "seccomp syscall number unknown for this architecture" -# define __NR_seccomp 0xffff -# endif -#endif - -#ifndef SECCOMP_SET_MODE_STRICT -#define SECCOMP_SET_MODE_STRICT 0 -#endif - -#ifndef SECCOMP_SET_MODE_FILTER -#define SECCOMP_SET_MODE_FILTER 1 -#endif - -#ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 -#endif - -#ifndef seccomp -int seccomp(unsigned int op, unsigned int flags, void *args) -{ - errno = 0; - return syscall(__NR_seccomp, op, flags, args); -} -#endif - TEST(seccomp_syscall) { struct sock_filter filter[] = { @@ -1783,6 +2045,67 @@ TEST(seccomp_syscall_mode_lock) } } +/* + * Test detection of known and unknown filter flags. Userspace needs to be able + * to check if a filter flag is supported by the current kernel and a good way + * of doing that is by attempting to enter filter mode, with the flag bit in + * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates + * that the flag is valid and EINVAL indicates that the flag is invalid. + */ +TEST(detect_seccomp_filter_flags) +{ + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_LOG }; + unsigned int flag, all_flags; + int i; + long ret; + + /* Test detection of known-good filter flags */ + for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", + flag); + } + + all_flags |= flag; + } + + /* Test detection of all known-good filter flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + all_flags); + } + + /* Test detection of an unknown filter flag */ + flag = -1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", + flag); + } + + /* + * Test detection of an unknown filter flag that may simply need to be + * added to this test + */ + flag = flags[ARRAY_SIZE(flags) - 1] << 1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", + flag); + } +} + TEST(TSYNC_first) { struct sock_filter filter[] = { @@ -2421,6 +2744,99 @@ TEST(syscall_restart) _metadata->passed = 0; } +TEST_SIGNAL(filter_flag_log, SIGSYS) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter kill_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct sock_fprog kill_prog = { + .len = (unsigned short)ARRAY_SIZE(kill_filter), + .filter = kill_filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_NE(0, ret) { + TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); + } + + /* Verify that a simple, permissive filter can be added with no flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + EXPECT_EQ(0, ret); + + /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); + } + EXPECT_EQ(0, ret); + + /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &kill_prog); + EXPECT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST(get_action_avail) +{ + __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, + SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, + SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; + __u32 unknown_action = 0x10000000U; + int i; + long ret; + + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); + } + EXPECT_EQ(ret, 0); + + for (i = 0; i < ARRAY_SIZE(actions); i++) { + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); + EXPECT_EQ(ret, 0) { + TH_LOG("Expected action (0x%X) not available!", + actions[i]); + } + } + + /* Check that an unknown action is handled properly (EOPNOTSUPP) */ + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); +} + /* * TODO: * - add microbenchmarks @@ -2429,6 +2845,8 @@ TEST(syscall_restart) * - endianness checking when appropriate * - 64-bit arg prodding * - arch value testing (x86 modes especially) + * - verify that FILTER_FLAG_LOG filters generate log messages + * - verify that RET_LOG generates log messages * - ... */ |