diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
| -rw-r--r-- | kernel/bpf/verifier.c | 1490 |
1 files changed, 1018 insertions, 472 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e76b55917905..a39eedecc93a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4,6 +4,7 @@ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io */ #include <uapi/linux/btf.h> +#include <linux/bpf-cgroup.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> @@ -240,12 +241,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_KFUNC_CALL; } -static bool bpf_pseudo_func(const struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; -} - struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -299,13 +294,15 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n"); - n = min(log->len_total - log->len_used - 1, n); - log->kbuf[n] = '\0'; - if (log->level == BPF_LOG_KERNEL) { - pr_err("BPF:%s\n", log->kbuf); + bool newline = n > 0 && log->kbuf[n - 1] == '\n'; + + pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); return; } + + n = min(log->len_total - log->len_used - 1, n); + log->kbuf[n] = '\0'; if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) log->len_used += n; else @@ -445,18 +442,6 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON; } -static bool reg_type_may_be_null(enum bpf_reg_type type) -{ - return type == PTR_TO_MAP_VALUE_OR_NULL || - type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_SOCK_COMMON_OR_NULL || - type == PTR_TO_TCP_SOCK_OR_NULL || - type == PTR_TO_BTF_ID_OR_NULL || - type == PTR_TO_MEM_OR_NULL || - type == PTR_TO_RDONLY_BUF_OR_NULL || - type == PTR_TO_RDWR_BUF_OR_NULL; -} - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && @@ -465,12 +450,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - return type == PTR_TO_SOCKET || - type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_TCP_SOCK || - type == PTR_TO_TCP_SOCK_OR_NULL || - type == PTR_TO_MEM || - type == PTR_TO_MEM_OR_NULL; + return base_type(type) == PTR_TO_SOCKET || + base_type(type) == PTR_TO_TCP_SOCK || + base_type(type) == PTR_TO_MEM; +} + +static bool type_is_rdonly_mem(u32 type) +{ + return type & MEM_RDONLY; } static bool arg_type_may_be_refcounted(enum bpf_arg_type type) @@ -478,14 +465,9 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type) return type == ARG_PTR_TO_SOCK_COMMON; } -static bool arg_type_may_be_null(enum bpf_arg_type type) +static bool type_may_be_null(u32 type) { - return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || - type == ARG_PTR_TO_MEM_OR_NULL || - type == ARG_PTR_TO_CTX_OR_NULL || - type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || - type == ARG_PTR_TO_STACK_OR_NULL; + return type & PTR_MAYBE_NULL; } /* Determine whether the function releases some resources allocated by another @@ -545,39 +527,56 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } -/* string representation of 'enum bpf_reg_type' */ -static const char * const reg_type_str[] = { - [NOT_INIT] = "?", - [SCALAR_VALUE] = "inv", - [PTR_TO_CTX] = "ctx", - [CONST_PTR_TO_MAP] = "map_ptr", - [PTR_TO_MAP_VALUE] = "map_value", - [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", - [PTR_TO_STACK] = "fp", - [PTR_TO_PACKET] = "pkt", - [PTR_TO_PACKET_META] = "pkt_meta", - [PTR_TO_PACKET_END] = "pkt_end", - [PTR_TO_FLOW_KEYS] = "flow_keys", - [PTR_TO_SOCKET] = "sock", - [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", - [PTR_TO_SOCK_COMMON] = "sock_common", - [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", - [PTR_TO_TCP_SOCK] = "tcp_sock", - [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", - [PTR_TO_TP_BUFFER] = "tp_buffer", - [PTR_TO_XDP_SOCK] = "xdp_sock", - [PTR_TO_BTF_ID] = "ptr_", - [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", - [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", - [PTR_TO_MEM] = "mem", - [PTR_TO_MEM_OR_NULL] = "mem_or_null", - [PTR_TO_RDONLY_BUF] = "rdonly_buf", - [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", - [PTR_TO_RDWR_BUF] = "rdwr_buf", - [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", - [PTR_TO_FUNC] = "func", - [PTR_TO_MAP_KEY] = "map_key", -}; +/* string representation of 'enum bpf_reg_type' + * + * Note that reg_type_str() can not appear more than once in a single verbose() + * statement. + */ +static const char *reg_type_str(struct bpf_verifier_env *env, + enum bpf_reg_type type) +{ + char postfix[16] = {0}, prefix[16] = {0}; + static const char * const str[] = { + [NOT_INIT] = "?", + [SCALAR_VALUE] = "inv", + [PTR_TO_CTX] = "ctx", + [CONST_PTR_TO_MAP] = "map_ptr", + [PTR_TO_MAP_VALUE] = "map_value", + [PTR_TO_STACK] = "fp", + [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", + [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_FLOW_KEYS] = "flow_keys", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TP_BUFFER] = "tp_buffer", + [PTR_TO_XDP_SOCK] = "xdp_sock", + [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", + [PTR_TO_MEM] = "mem", + [PTR_TO_BUF] = "buf", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", + }; + + if (type & PTR_MAYBE_NULL) { + if (base_type(type) == PTR_TO_BTF_ID || + base_type(type) == PTR_TO_PERCPU_BTF_ID) + strncpy(postfix, "or_null_", 16); + else + strncpy(postfix, "_or_null", 16); + } + + if (type & MEM_RDONLY) + strncpy(prefix, "rdonly_", 16); + if (type & MEM_ALLOC) + strncpy(prefix, "alloc_", 16); + + snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", + prefix, str[base_type(type)], postfix); + return env->type_str_buf; +} static char slot_type_char[] = { [STACK_INVALID] = '?', @@ -612,8 +611,61 @@ static const char *kernel_type_name(const struct btf* btf, u32 id) return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } +static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) +{ + env->scratched_regs |= 1U << regno; +} + +static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) +{ + env->scratched_stack_slots |= 1ULL << spi; +} + +static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +{ + return (env->scratched_regs >> regno) & 1; +} + +static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) +{ + return (env->scratched_stack_slots >> regno) & 1; +} + +static bool verifier_state_scratched(const struct bpf_verifier_env *env) +{ + return env->scratched_regs || env->scratched_stack_slots; +} + +static void mark_verifier_state_clean(struct bpf_verifier_env *env) +{ + env->scratched_regs = 0U; + env->scratched_stack_slots = 0ULL; +} + +/* Used for printing the entire verifier state. */ +static void mark_verifier_state_scratched(struct bpf_verifier_env *env) +{ + env->scratched_regs = ~0U; + env->scratched_stack_slots = ~0ULL; +} + +/* The reg state of a pointer or a bounded scalar was saved when + * it was spilled to the stack. + */ +static bool is_spilled_reg(const struct bpf_stack_state *stack) +{ + return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; +} + +static void scrub_spilled_slot(u8 *stype) +{ + if (*stype != STACK_INVALID) + *stype = STACK_MISC; +} + static void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state) + const struct bpf_func_state *state, + bool print_all) { const struct bpf_reg_state *reg; enum bpf_reg_type t; @@ -626,9 +678,11 @@ static void print_verifier_state(struct bpf_verifier_env *env, t = reg->type; if (t == NOT_INIT) continue; + if (!print_all && !reg_scratched(env, i)) + continue; verbose(env, " R%d", i); print_liveness(env, reg->live); - verbose(env, "=%s", reg_type_str[t]); + verbose(env, "=%s", reg_type_str(env, t)); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && @@ -636,9 +690,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (t == PTR_TO_BTF_ID || - t == PTR_TO_BTF_ID_OR_NULL || - t == PTR_TO_PERCPU_BTF_ID) + if (base_type(t) == PTR_TO_BTF_ID || + base_type(t) == PTR_TO_PERCPU_BTF_ID) verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -647,10 +700,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); - else if (t == CONST_PTR_TO_MAP || - t == PTR_TO_MAP_KEY || - t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL) + else if (base_type(t) == CONST_PTR_TO_MAP || + base_type(t) == PTR_TO_MAP_KEY || + base_type(t) == PTR_TO_MAP_VALUE) verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size); @@ -715,12 +767,14 @@ static void print_verifier_state(struct bpf_verifier_env *env, types_buf[BPF_REG_SIZE] = 0; if (!valid) continue; + if (!print_all && !stack_slot_scratched(env, i)) + continue; verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); print_liveness(env, state->stack[i].spilled_ptr.live); - if (state->stack[i].slot_type[0] == STACK_SPILL) { + if (is_spilled_reg(&state->stack[i])) { reg = &state->stack[i].spilled_ptr; t = reg->type; - verbose(env, "=%s", reg_type_str[t]); + verbose(env, "=%s", reg_type_str(env, t)); if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) @@ -740,6 +794,26 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); + mark_verifier_state_clean(env); +} + +static inline u32 vlog_alignment(u32 pos) +{ + return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), + BPF_LOG_MIN_ALIGNMENT) - pos - 1; +} + +static void print_insn_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state) +{ + if (env->prev_log_len && env->prev_log_len == env->log.len_used) { + /* remove new line character */ + bpf_vlog_reset(&env->log, env->prev_log_len - 1); + verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' '); + } else { + verbose(env, "%d:", env->insn_idx); + } + print_verifier_state(env, state, false); } /* copy array src of length n * size bytes to dst. dst is reallocated if it's too @@ -1133,8 +1207,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) { - switch (reg->type) { - case PTR_TO_MAP_VALUE_OR_NULL: { + if (base_type(reg->type) == PTR_TO_MAP_VALUE) { const struct bpf_map *map = reg->map_ptr; if (map->inner_map_meta) { @@ -1143,7 +1216,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) /* transfer reg's id which is unique for every map_lookup_elem * as UID of the inner map. */ - reg->map_uid = reg->id; + if (map_value_has_timer(map->inner_map_meta)) + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -1152,32 +1226,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) } else { reg->type = PTR_TO_MAP_VALUE; } - break; - } - case PTR_TO_SOCKET_OR_NULL: - reg->type = PTR_TO_SOCKET; - break; - case PTR_TO_SOCK_COMMON_OR_NULL: - reg->type = PTR_TO_SOCK_COMMON; - break; - case PTR_TO_TCP_SOCK_OR_NULL: - reg->type = PTR_TO_TCP_SOCK; - break; - case PTR_TO_BTF_ID_OR_NULL: - reg->type = PTR_TO_BTF_ID; - break; - case PTR_TO_MEM_OR_NULL: - reg->type = PTR_TO_MEM; - break; - case PTR_TO_RDONLY_BUF_OR_NULL: - reg->type = PTR_TO_RDONLY_BUF; - break; - case PTR_TO_RDWR_BUF_OR_NULL: - reg->type = PTR_TO_RDWR_BUF; - break; - default: - WARN_ONCE(1, "unknown nullable register type"); + return; } + + reg->type &= ~PTR_MAYBE_NULL; } static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) @@ -1357,22 +1409,28 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) @@ -1406,12 +1464,12 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - return a > S32_MIN && a < S32_MAX; + return a >= S32_MIN && a <= S32_MAX; } static bool __reg64_bound_u32(u64 a) { - return a > U32_MIN && a < U32_MAX; + return a >= U32_MIN && a <= U32_MAX; } static void __reg_combine_64_into_32(struct bpf_reg_state *reg) @@ -1529,6 +1587,7 @@ static void init_func_state(struct bpf_verifier_env *env, state->frameno = frameno; state->subprogno = subprogno; init_reg_state(env, state); + mark_verifier_state_scratched(env); } /* Similar to push_stack(), but for async callbacks */ @@ -1626,52 +1685,168 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +#define MAX_KFUNC_DESCS 256 +#define MAX_KFUNC_BTFS 256 + struct bpf_kfunc_desc { struct btf_func_model func_model; u32 func_id; s32 imm; + u16 offset; +}; + +struct bpf_kfunc_btf { + struct btf *btf; + struct module *module; + u16 offset; }; -#define MAX_KFUNC_DESCS 256 struct bpf_kfunc_desc_tab { struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; -static int kfunc_desc_cmp_by_id(const void *a, const void *b) +struct bpf_kfunc_btf_tab { + struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; /* func_id is not greater than BTF_MAX_TYPE */ - return d0->func_id - d1->func_id; + return d0->func_id - d1->func_id ?: d0->offset - d1->offset; +} + +static int kfunc_btf_cmp_by_off(const void *a, const void *b) +{ + const struct bpf_kfunc_btf *d0 = a; + const struct bpf_kfunc_btf *d1 = b; + + return d0->offset - d1->offset; } static const struct bpf_kfunc_desc * -find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { .func_id = func_id, + .offset = offset, }; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; return bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); +} + +static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, + s16 offset, struct module **btf_modp) +{ + struct bpf_kfunc_btf kf_btf = { .offset = offset }; + struct bpf_kfunc_btf_tab *tab; + struct bpf_kfunc_btf *b; + struct module *mod; + struct btf *btf; + int btf_fd; + + tab = env->prog->aux->kfunc_btf_tab; + b = bsearch(&kf_btf, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); + if (!b) { + if (tab->nr_descs == MAX_KFUNC_BTFS) { + verbose(env, "too many different module BTFs\n"); + return ERR_PTR(-E2BIG); + } + + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); + return ERR_PTR(-EPROTO); + } + + if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, + offset * sizeof(btf_fd), + sizeof(btf_fd))) + return ERR_PTR(-EFAULT); + + btf = btf_get_by_fd(btf_fd); + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF fd specified\n"); + return btf; + } + + if (!btf_is_module(btf)) { + verbose(env, "BTF fd for kfunc is not a module BTF\n"); + btf_put(btf); + return ERR_PTR(-EINVAL); + } + + mod = btf_try_get_module(btf); + if (!mod) { + btf_put(btf); + return ERR_PTR(-ENXIO); + } + + b = &tab->descs[tab->nr_descs++]; + b->btf = btf; + b->module = mod; + b->offset = offset; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_btf_cmp_by_off, NULL); + } + if (btf_modp) + *btf_modp = b->module; + return b->btf; +} + +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) +{ + if (!tab) + return; + + while (tab->nr_descs--) { + module_put(tab->descs[tab->nr_descs].module); + btf_put(tab->descs[tab->nr_descs].btf); + } + kfree(tab); +} + +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, + u32 func_id, s16 offset, + struct module **btf_modp) +{ + if (offset) { + if (offset < 0) { + /* In the future, this can be allowed to increase limit + * of fd index into fd_array, interpreted as u16. + */ + verbose(env, "negative offset disallowed for kernel module function call\n"); + return ERR_PTR(-EINVAL); + } + + return __find_kfunc_desc_btf(env, offset, btf_modp); + } + return btf_vmlinux ?: ERR_PTR(-ENOENT); } -static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; + struct bpf_kfunc_btf_tab *btf_tab; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; const char *func_name; + struct btf *desc_btf; unsigned long addr; int err; prog_aux = env->prog->aux; tab = prog_aux->kfunc_tab; + btf_tab = prog_aux->kfunc_btf_tab; if (!tab) { if (!btf_vmlinux) { verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); @@ -1699,7 +1874,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) prog_aux->kfunc_tab = tab; } - if (find_kfunc_desc(env->prog, func_id)) + /* func_id == 0 is always invalid, but instead of returning an error, be + * conservative and wait until the code elimination pass before returning + * error, so that invalid calls that get pruned out can be in BPF programs + * loaded from userspace. It is also required that offset be untouched + * for such calls. + */ + if (!func_id && !offset) + return 0; + + if (!btf_tab && offset) { + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + if (!btf_tab) + return -ENOMEM; + prog_aux->kfunc_btf_tab = btf_tab; + } + + desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + if (IS_ERR(desc_btf)) { + verbose(env, "failed to find BTF for kernel function\n"); + return PTR_ERR(desc_btf); + } + + if (find_kfunc_desc(env->prog, func_id, offset)) return 0; if (tab->nr_descs == MAX_KFUNC_DESCS) { @@ -1707,20 +1904,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) return -E2BIG; } - func = btf_type_by_id(btf_vmlinux, func_id); + func = btf_type_by_id(desc_btf, func_id); if (!func || !btf_type_is_func(func)) { verbose(env, "kernel btf_id %u is not a function\n", func_id); return -EINVAL; } - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func_proto = btf_type_by_id(desc_btf, func->type); if (!func_proto || !btf_type_is_func_proto(func_proto)) { verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", func_id); return -EINVAL; } - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_name = btf_name_by_offset(desc_btf, func->name_off); addr = kallsyms_lookup_name(func_name); if (!addr) { verbose(env, "cannot find address for kernel function %s\n", @@ -1730,13 +1927,14 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; - err = btf_distill_func_proto(&env->log, btf_vmlinux, + desc->imm = BPF_CALL_IMM(addr); + desc->offset = offset; + err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); if (!err) sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id, NULL); + kfunc_desc_cmp_by_id_off, NULL); return err; } @@ -1807,16 +2005,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return -EPERM; } - if (bpf_pseudo_func(insn)) { + if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) ret = add_subprog(env, i + insn->imm + 1); - if (ret >= 0) - /* remember subprog */ - insn[1].imm = ret; - } else if (bpf_pseudo_call(insn)) { - ret = add_subprog(env, i + insn->imm + 1); - } else { - ret = add_kfunc_call(env, insn->imm); - } + else + ret = add_kfunc_call(env, insn->imm, insn->off); if (ret < 0) return ret; @@ -1899,7 +2091,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, break; if (parent->live & REG_LIVE_DONE) { verbose(env, "verifier BUG type %s var_off %lld off %d\n", - reg_type_str[parent->type], + reg_type_str(env, parent->type), parent->var_off.value, parent->off); return -EFAULT; } @@ -2083,6 +2275,8 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return -EINVAL; } + mark_reg_scratched(env, regno); + reg = ®s[regno]; rw64 = is_reg64(env, insn, regno, reg, t); if (t == SRC_OP) { @@ -2152,12 +2346,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) { const struct btf_type *func; + struct btf *desc_btf; if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - func = btf_type_by_id(btf_vmlinux, insn->imm); - return btf_name_by_offset(btf_vmlinux, func->name_off); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + if (IS_ERR(desc_btf)) + return "<error>"; + + func = btf_type_by_id(desc_btf, insn->imm); + return btf_name_by_offset(desc_btf, func->name_off); } /* For given verifier state backtrack_insn() is called from the last insn to @@ -2182,7 +2381,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (insn->code == 0) return 0; - if (env->log.level & BPF_LOG_LEVEL) { + if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); verbose(env, "%d: ", idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); @@ -2232,8 +2431,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2256,8 +2453,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); @@ -2373,7 +2568,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, reg->precise = true; } for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { - if (func->stack[j].slot_type[0] != STACK_SPILL) + if (!is_spilled_reg(&func->stack[j])) continue; reg = &func->stack[j].spilled_ptr; if (reg->type != SCALAR_VALUE) @@ -2415,7 +2610,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, } while (spi >= 0) { - if (func->stack[spi].slot_type[0] != STACK_SPILL) { + if (!is_spilled_reg(&func->stack[spi])) { stack_mask = 0; break; } @@ -2440,7 +2635,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; - if (env->log.level & BPF_LOG_LEVEL) + if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); for (i = last_idx;;) { if (skip_first) { @@ -2514,7 +2709,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, return 0; } - if (func->stack[i].slot_type[0] != STACK_SPILL) { + if (!is_spilled_reg(&func->stack[i])) { stack_mask &= ~(1ull << i); continue; } @@ -2527,11 +2722,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, new_marks = true; reg->precise = true; } - if (env->log.level & BPF_LOG_LEVEL) { - print_verifier_state(env, func); - verbose(env, "parent %s regs=%x stack=%llx marks\n", + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "parent %s regs=%x stack=%llx marks:", new_marks ? "didn't have" : "already had", reg_mask, stack_mask); + print_verifier_state(env, func, true); } if (!reg_mask && !stack_mask) @@ -2557,9 +2752,8 @@ static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) static bool is_spillable_regtype(enum bpf_reg_type type) { - switch (type) { + switch (base_type(type)) { case PTR_TO_MAP_VALUE: - case PTR_TO_MAP_VALUE_OR_NULL: case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: @@ -2568,21 +2762,13 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: - case PTR_TO_BTF_ID_OR_NULL: - case PTR_TO_RDONLY_BUF: - case PTR_TO_RDONLY_BUF_OR_NULL: - case PTR_TO_RDWR_BUF: - case PTR_TO_RDWR_BUF_OR_NULL: + case PTR_TO_BUF: case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: - case PTR_TO_MEM_OR_NULL: case PTR_TO_FUNC: case PTR_TO_MAP_KEY: return true; @@ -2626,15 +2812,21 @@ static bool __is_pointer_value(bool allow_ptr_leaks, } static void save_register_state(struct bpf_func_state *state, - int spi, struct bpf_reg_state *reg) + int spi, struct bpf_reg_state *reg, + int size) { int i; state->stack[spi].spilled_ptr = *reg; - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + if (size == BPF_REG_SIZE) + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - for (i = 0; i < BPF_REG_SIZE; i++) - state->stack[spi].slot_type[i] = STACK_SPILL; + for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) + state->stack[spi].slot_type[i - 1] = STACK_SPILL; + + /* size < 8 bytes spill */ + for (; i; i--) + scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]); } /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, @@ -2681,7 +2873,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, env->insn_aux_data[insn_idx].sanitize_stack_spill = true; } - if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && + mark_stack_slot_scratched(env, spi); + if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { if (dst_reg != BPF_REG_FP) { /* The backtracking logic can only recognize explicit @@ -2694,7 +2887,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (err) return err; } - save_register_state(state, spi, reg); + save_register_state(state, spi, reg, size); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -2706,16 +2899,16 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - save_register_state(state, spi, reg); + save_register_state(state, spi, reg, size); } else { u8 type = STACK_MISC; /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ - if (state->stack[spi].slot_type[0] == STACK_SPILL) + if (is_spilled_reg(&state->stack[spi])) for (i = 0; i < BPF_REG_SIZE; i++) - state->stack[spi].slot_type[i] = STACK_MISC; + scrub_spilled_slot(&state->stack[spi].slot_type[i]); /* only mark the slot as written if all 8 bytes were written * otherwise read propagation may incorrectly stop too soon @@ -2802,6 +2995,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, slot = -i - 1; spi = slot / BPF_REG_SIZE; stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; + mark_stack_slot_scratched(env, spi); if (!env->allow_ptr_leaks && *stype != NOT_INIT @@ -2918,31 +3112,52 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; struct bpf_reg_state *reg; - u8 *stype; + u8 *stype, type; stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; - if (stype[0] == STACK_SPILL) { - if (size != BPF_REG_SIZE) { + if (is_spilled_reg(®_state->stack[spi])) { + u8 spill_size = 1; + + for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) + spill_size++; + + if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { if (reg->type != SCALAR_VALUE) { verbose_linfo(env, env->insn_idx, "; "); verbose(env, "invalid size of register fill\n"); return -EACCES; } - if (dst_regno >= 0) { + + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + if (dst_regno < 0) + return 0; + + if (!(off % BPF_REG_SIZE) && size == spill_size) { + /* The earlier check_reg_arg() has decided the + * subreg_def for this insn. Save it first. + */ + s32 subreg_def = state->regs[dst_regno].subreg_def; + + state->regs[dst_regno] = *reg; + state->regs[dst_regno].subreg_def = subreg_def; + } else { + for (i = 0; i < size; i++) { + type = stype[(slot - i) % BPF_REG_SIZE]; + if (type == STACK_SPILL) + continue; + if (type == STACK_MISC) + continue; + verbose(env, "invalid read from stack off %d+%d size %d\n", + off, i, size); + return -EACCES; + } mark_reg_unknown(env, state->regs, dst_regno); - state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; return 0; } - for (i = 1; i < BPF_REG_SIZE; i++) { - if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { - verbose(env, "corrupted spill memory\n"); - return -EACCES; - } - } if (dst_regno >= 0) { /* restore register state from stack */ @@ -2965,8 +3180,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { - u8 type; - for (i = 0; i < size; i++) { type = stype[(slot - i) % BPF_REG_SIZE]; if (type == STACK_MISC) @@ -3199,11 +3412,8 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, /* We may have adjusted the register pointing to memory region, so we * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. - */ - if (env->log.level & BPF_LOG_LEVEL) - print_verifier_state(env, state); - - /* The minimum value is only important with signed + * + * The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use @@ -3398,7 +3608,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { + if (base_type(*reg_type) == PTR_TO_BTF_ID) { *btf = info.btf; *btf_id = info.btf_id; } else { @@ -3466,7 +3676,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, } verbose(env, "R%d invalid %s access off=%d size=%d\n", - regno, reg_type_str[reg->type], off, size); + regno, reg_type_str(env, reg->type), off, size); return -EACCES; } @@ -3761,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, } #endif -int check_ctx_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) +static int __check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + bool fixed_off_ok) { - /* Access to ctx or passing it to a helper is only allowed in - * its original, unmodified form. + /* Access to this pointer-typed register or passing it to a helper + * is only allowed in its original, unmodified form. */ - if (reg->off) { - verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", - regno, reg->off); + if (!fixed_off_ok && reg->off) { + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", + reg_type_str(env, reg->type), regno, reg->off); return -EACCES; } @@ -3778,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); + verbose(env, "variable %s access var_off=%s disallowed\n", + reg_type_str(env, reg->type), tn_buf); return -EACCES; } return 0; } +int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + return __check_ptr_off_reg(env, reg, regno, false); +} + static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, @@ -3884,7 +4102,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) static bool bpf_map_is_rdonly(const struct bpf_map *map) { - return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; + /* A map is considered read-only if the following condition are true: + * + * 1) BPF program side cannot change any of the map content. The + * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map + * and was set at map creation time. + * 2) The map value(s) have been initialized from user space by a + * loader and then "frozen", such that no new map update/delete + * operations from syscall side are possible for the rest of + * the map's lifetime from that point onwards. + * 3) Any parallel/pending map update/delete operations from syscall + * side have been completed. Only after that point, it's safe to + * assume that map value(s) are immutable. + */ + return (map->map_flags & BPF_F_RDONLY_PROG) && + READ_ONCE(map->frozen) && + !bpf_map_write_active(map); } static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) @@ -4178,15 +4411,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn mark_reg_unknown(env, regs, value_regno); } } - } else if (reg->type == PTR_TO_MEM) { + } else if (base_type(reg->type) == PTR_TO_MEM) { + bool rdonly_mem = type_is_rdonly_mem(reg->type); + + if (type_may_be_null(reg->type)) { + verbose(env, "R%d invalid mem access '%s'\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + + if (t == BPF_WRITE && rdonly_mem) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into mem\n", value_regno); return -EACCES; } + err = check_mem_region_access(env, regno, off, size, reg->mem_size, false); - if (!err && t == BPF_READ && value_regno >= 0) + if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -4199,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_ctx_reg(env, reg, regno); + err = check_ptr_off_reg(env, reg, regno); if (err < 0) return err; @@ -4216,7 +4464,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else { mark_reg_known_zero(env, regs, value_regno); - if (reg_type_may_be_null(reg_type)) + if (type_may_be_null(reg_type)) regs[value_regno].id = ++env->id_gen; /* A load of ctx field could have different * actual load size with the one encoded in the @@ -4224,8 +4472,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; - if (reg_type == PTR_TO_BTF_ID || - reg_type == PTR_TO_BTF_ID_OR_NULL) { + if (base_type(reg_type) == PTR_TO_BTF_ID) { regs[value_regno].btf = btf; regs[value_regno].btf_id = btf_id; } @@ -4278,7 +4525,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str[reg->type]); + regno, reg_type_str(env, reg->type)); return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); @@ -4294,26 +4541,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); - } else if (reg->type == PTR_TO_RDONLY_BUF) { - if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", - regno, reg_type_str[reg->type]); - return -EACCES; + } else if (base_type(reg->type) == PTR_TO_BUF) { + bool rdonly_mem = type_is_rdonly_mem(reg->type); + const char *buf_info; + u32 *max_access; + + if (rdonly_mem) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str(env, reg->type)); + return -EACCES; + } + buf_info = "rdonly"; + max_access = &env->prog->aux->max_rdonly_access; + } else { + buf_info = "rdwr"; + max_access = &env->prog->aux->max_rdwr_access; } + err = check_buffer_access(env, reg, regno, off, size, false, - "rdonly", - &env->prog->aux->max_rdonly_access); - if (!err && value_regno >= 0) - mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_RDWR_BUF) { - err = check_buffer_access(env, reg, regno, off, size, false, - "rdwr", - &env->prog->aux->max_rdwr_access); - if (!err && t == BPF_READ && value_regno >= 0) + buf_info, max_access); + + if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EACCES; } @@ -4364,9 +4617,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (insn->imm == BPF_CMPXCHG) { /* Check comparison of R0 with memory location */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + const u32 aux_reg = BPF_REG_0; + + err = check_reg_arg(env, aux_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, aux_reg)) { + verbose(env, "R%d leaks addr into mem\n", aux_reg); + return -EACCES; + } } if (is_pointer_value(env, insn->src_reg)) { @@ -4380,7 +4640,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str[reg_state(env, insn->dst_reg)->type]); + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); return -EACCES; } @@ -4401,13 +4661,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i load_reg = -1; } - /* check whether we can read the memory */ + /* Check whether we can read the memory, with second call for fetch + * case to simulate the register fill. + */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, true); + BPF_SIZE(insn->code), BPF_READ, -1, true); + if (!err && load_reg >= 0) + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, load_reg, + true); if (err) return err; - /* check whether we can write into the same memory */ + /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); if (err) @@ -4514,17 +4780,17 @@ static int check_stack_range_initialized( goto mark; } - if (state->stack[spi].slot_type[0] == STACK_SPILL && + if (is_spilled_reg(&state->stack[spi]) && state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) goto mark; - if (state->stack[spi].slot_type[0] == STACK_SPILL && + if (is_spilled_reg(&state->stack[spi]) && (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) { if (clobber) { __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) - state->stack[spi].slot_type[j] = STACK_MISC; + scrub_spilled_slot(&state->stack[spi].slot_type[j]); } goto mark; } @@ -4557,8 +4823,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + const char *buf_info; + u32 *max_access; - switch (reg->type) { + switch (base_type(reg->type)) { case PTR_TO_PACKET: case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, @@ -4577,18 +4845,20 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); - case PTR_TO_RDONLY_BUF: - if (meta && meta->raw_mode) - return -EACCES; - return check_buffer_access(env, reg, regno, reg->off, - access_size, zero_size_allowed, - "rdonly", - &env->prog->aux->max_rdonly_access); - case PTR_TO_RDWR_BUF: + case PTR_TO_BUF: + if (type_is_rdonly_mem(reg->type)) { + if (meta && meta->raw_mode) + return -EACCES; + + buf_info = "rdonly"; + max_access = &env->prog->aux->max_rdonly_access; + } else { + buf_info = "rdwr"; + max_access = &env->prog->aux->max_rdwr_access; + } return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, - "rdwr", - &env->prog->aux->max_rdwr_access); + buf_info, max_access); case PTR_TO_STACK: return check_stack_range_initialized( env, @@ -4600,9 +4870,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, register_is_null(reg)) return 0; - verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[reg->type], - reg_type_str[PTR_TO_STACK]); + verbose(env, "R%d type=%s ", regno, + reg_type_str(env, reg->type)); + verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); return -EACCES; } } @@ -4613,7 +4883,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, if (register_is_null(reg)) return 0; - if (reg_type_may_be_null(reg->type)) { + if (type_may_be_null(reg->type)) { /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. @@ -4761,9 +5031,8 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { - return type == ARG_PTR_TO_MEM || - type == ARG_PTR_TO_MEM_OR_NULL || - type == ARG_PTR_TO_UNINIT_MEM; + return base_type(type) == ARG_PTR_TO_MEM || + base_type(type) == ARG_PTR_TO_UNINIT_MEM; } static bool arg_type_is_mem_size(enum bpf_arg_type type) @@ -4813,7 +5082,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -4865,8 +5137,8 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, - PTR_TO_RDONLY_BUF, - PTR_TO_RDWR_BUF, + PTR_TO_MEM | MEM_ALLOC, + PTR_TO_BUF, }, }; @@ -4883,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = { static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -4897,31 +5169,26 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_MAP_PTR] = &const_map_ptr_types, [ARG_PTR_TO_CTX] = &context_types, - [ARG_PTR_TO_CTX_OR_NULL] = &context_types, [ARG_PTR_TO_SOCK_COMMON] = &sock_types, #ifdef CONFIG_NET [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, #endif [ARG_PTR_TO_SOCKET] = &fullsock_types, - [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, [ARG_PTR_TO_UNINIT_MEM] = &mem_types, [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, - [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, - [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, + [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, }; @@ -4935,12 +5202,27 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, const struct bpf_reg_types *compatible; int i, j; - compatible = compatible_reg_types[arg_type]; + compatible = compatible_reg_types[base_type(arg_type)]; if (!compatible) { verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); return -EFAULT; } + /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, + * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY + * + * Same for MAYBE_NULL: + * + * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, + * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL + * + * Therefore we fold these flags depending on the arg_type before comparison. + */ + if (arg_type & MEM_RDONLY) + type &= ~MEM_RDONLY; + if (arg_type & PTR_MAYBE_NULL) + type &= ~PTR_MAYBE_NULL; + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; if (expected == NOT_INIT) @@ -4950,14 +5232,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, goto found; } - verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); + verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); for (j = 0; j + 1 < i; j++) - verbose(env, "%s, ", reg_type_str[compatible->types[j]]); - verbose(env, "%s\n", reg_type_str[compatible->types[j]]); + verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); + verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); return -EACCES; found: - if (type == PTR_TO_BTF_ID) { + if (reg->type == PTR_TO_BTF_ID) { if (!arg_btf_id) { if (!compatible->btf_id) { verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); @@ -4973,12 +5255,6 @@ found: kernel_type_name(btf_vmlinux, *arg_btf_id)); return -EACCES; } - - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", - regno); - return -EACCES; - } } return 0; @@ -5016,15 +5292,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (arg_type == ARG_PTR_TO_MAP_VALUE || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || - arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { + if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || + base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { err = resolve_map_arg_type(env, meta, &arg_type); if (err) return err; } - if (register_is_null(reg) && arg_type_may_be_null(arg_type)) + if (register_is_null(reg) && type_may_be_null(arg_type)) /* A NULL register has a SCALAR_VALUE type, so skip * type checking. */ @@ -5034,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (err) return err; - if (type == PTR_TO_CTX) { - err = check_ctx_reg(env, reg, regno); + switch ((u32)type) { + case SCALAR_VALUE: + /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: + case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_STACK: + /* Some of the argument types nevertheless require a + * zero register offset. + */ + if (arg_type == ARG_PTR_TO_ALLOC_MEM) + goto force_off_check; + break; + /* All the rest must be rejected: */ + default: +force_off_check: + err = __check_ptr_off_reg(env, reg, regno, + type == PTR_TO_BTF_ID); if (err < 0) return err; + break; } skip_type_check: @@ -5093,10 +5391,11 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (arg_type == ARG_PTR_TO_MAP_VALUE || - (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && - !register_is_null(reg)) || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { + } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || + base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + if (type_may_be_null(arg_type) && register_is_null(reg)) + return 0; + /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ @@ -5388,6 +5687,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -5455,13 +5759,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -5750,6 +6059,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn } if (insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) { struct bpf_verifier_state *async_cb; @@ -5808,9 +6118,9 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller); + print_verifier_state(env, caller, true); verbose(env, "callee:\n"); - print_verifier_state(env, callee); + print_verifier_state(env, callee, true); } return 0; } @@ -5901,6 +6211,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_loop_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, + * u64 flags); + * callback_fn(u32 index, void *callback_ctx); + */ + callee->regs[BPF_REG_1].type = SCALAR_VALUE; + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + return 0; +} + static int set_timer_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -5930,6 +6261,33 @@ static int set_timer_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_find_vma_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_find_vma(struct task_struct *task, u64 addr, + * void *callback_fn, void *callback_ctx, u64 flags) + * (callback_fn)(struct task_struct *task, + * struct vm_area_struct *vma, void *callback_ctx); + */ + callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; + + callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].btf = btf_vmlinux; + callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA], + + /* pointer to stack or null */ + callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -5977,9 +6335,9 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) *insn_idx = callee->callsite + 1; if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee); + print_verifier_state(env, callee, true); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller); + print_verifier_state(env, caller, true); } /* clear everything in the callee */ free_func_state(callee); @@ -6145,13 +6503,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, static int check_get_func_ip(struct bpf_verifier_env *env) { - enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); int func_id = BPF_FUNC_get_func_ip; if (type == BPF_PROG_TYPE_TRACING) { - if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && - eatype != BPF_MODIFY_RETURN) { + if (!bpf_prog_has_trampoline(env->prog)) { verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", func_id_name(func_id), func_id); return -ENOTSUPP; @@ -6170,6 +6526,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; + enum bpf_return_type ret_type; + enum bpf_type_flag ret_flag; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; int insn_idx = *insn_idx_p; @@ -6247,13 +6605,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } - if (func_id == BPF_FUNC_tail_call) { - err = check_reference_leak(env); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); - return err; - } - } else if (is_release_function(func_id)) { + if (is_release_function(func_id)) { err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", @@ -6264,35 +6616,47 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); - /* check that flags argument in get_local_storage(map, flags) is 0, - * this is required because get_local_storage() can't return an error. - */ - if (func_id == BPF_FUNC_get_local_storage && - !register_is_null(®s[BPF_REG_2])) { - verbose(env, "get_local_storage() doesn't support non-zero flags\n"); - return -EINVAL; - } - - if (func_id == BPF_FUNC_for_each_map_elem) { + switch (func_id) { + case BPF_FUNC_tail_call: + err = check_reference_leak(env); + if (err) { + verbose(env, "tail_call would lead to reference leak\n"); + return err; + } + break; + case BPF_FUNC_get_local_storage: + /* check that flags argument in get_local_storage(map, flags) is 0, + * this is required because get_local_storage() can't return an error. + */ + if (!register_is_null(®s[BPF_REG_2])) { + verbose(env, "get_local_storage() doesn't support non-zero flags\n"); + return -EINVAL; + } + break; + case BPF_FUNC_for_each_map_elem: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_map_elem_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_timer_set_callback) { + break; + case BPF_FUNC_timer_set_callback: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_timer_callback_state); - if (err < 0) - return -EINVAL; - } - - if (func_id == BPF_FUNC_snprintf) { + break; + case BPF_FUNC_find_vma: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_find_vma_callback_state); + break; + case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); - if (err < 0) - return err; + break; + case BPF_FUNC_loop: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_loop_callback_state); + break; } + if (err) + return err; + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -6303,13 +6667,14 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; /* update return register (already marked as written above) */ - if (fn->ret_type == RET_INTEGER) { + ret_type = fn->ret_type; + ret_flag = type_flag(fn->ret_type); + if (ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ mark_reg_unknown(env, regs, BPF_REG_0); - } else if (fn->ret_type == RET_VOID) { + } else if (ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; - } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || - fn->ret_type == RET_PTR_TO_MAP_VALUE) { + } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); /* remember map_ptr, so that check_map_access() @@ -6323,28 +6688,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; - if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; - if (map_value_has_spin_lock(meta.map_ptr)) - regs[BPF_REG_0].id = ++env->id_gen; - } else { - regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; + if (!type_may_be_null(ret_type) && + map_value_has_spin_lock(meta.map_ptr)) { + regs[BPF_REG_0].id = ++env->id_gen; } - } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { + } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; - } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = meta.mem_size; - } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { + } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { const struct btf_type *t; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -6362,29 +6724,30 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn tname, PTR_ERR(ret)); return -EINVAL; } - regs[BPF_REG_0].type = - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? - PTR_TO_MEM : PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = tsize; } else { - regs[BPF_REG_0].type = - fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? - PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; + /* MEM_RDONLY may be carried from ret_flag, but it + * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise + * it will confuse the check of PTR_TO_BTF_ID in + * check_mem_access(). + */ + ret_flag &= ~MEM_RDONLY; + + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || - fn->ret_type == RET_PTR_TO_BTF_ID) { + } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? - PTR_TO_BTF_ID : - PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { - verbose(env, "invalid return type %d of func %s#%d\n", - fn->ret_type, func_id_name(func_id), func_id); + verbose(env, "invalid return type %u of func %s#%d\n", + base_type(ret_type), func_id_name(func_id), + func_id); return -EINVAL; } /* current BPF helper definitions are only coming from @@ -6393,12 +6756,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].btf = btf_vmlinux; regs[BPF_REG_0].btf_id = ret_btf_id; } else { - verbose(env, "unknown return type %d of func %s#%d\n", - fn->ret_type, func_id_name(func_id), func_id); + verbose(env, "unknown return type %u of func %s#%d\n", + base_type(ret_type), func_id_name(func_id), func_id); return -EINVAL; } - if (reg_type_may_be_null(regs[BPF_REG_0].type)) + if (type_may_be_null(regs[BPF_REG_0].type)) regs[BPF_REG_0].id = ++env->id_gen; if (is_ptr_cast_function(func_id)) { @@ -6485,23 +6848,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; + struct module *btf_mod = NULL; const struct btf_param *args; + struct btf *desc_btf; int err; + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + if (IS_ERR(desc_btf)) + return PTR_ERR(desc_btf); + func_id = insn->imm; - func = btf_type_by_id(btf_vmlinux, func_id); - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func = btf_type_by_id(desc_btf, func_id); + func_name = btf_name_by_offset(desc_btf, func->name_off); + func_proto = btf_type_by_id(desc_btf, func->type); if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id)) { + !env->ops->check_kfunc_call(func_id, btf_mod)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); if (err) return err; @@ -6509,15 +6882,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", func_name, btf_type_str(ptr_type), @@ -6525,7 +6898,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); @@ -6536,7 +6909,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) for (i = 0; i < nargs; i++) { u32 regno = i + 1; - t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); if (btf_type_is_ptr(t)) mark_btf_func_reg_size(env, regno, sizeof(void *)); else @@ -6597,25 +6970,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { verbose(env, "math between %s pointer and %lld is not allowed\n", - reg_type_str[type], val); + reg_type_str(env, type), val); return false; } if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { verbose(env, "%s pointer offset %d is not allowed\n", - reg_type_str[type], reg->off); + reg_type_str(env, type), reg->off); return false; } if (smin == S64_MIN) { verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", - reg_type_str[type]); + reg_type_str(env, type)); return false; } if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { verbose(env, "value %lld makes %s pointer be out of bounds\n", - smin, reg_type_str[type]); + smin, reg_type_str(env, type)); return false; } @@ -6992,11 +7365,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } - switch (ptr_reg->type) { - case PTR_TO_MAP_VALUE_OR_NULL: + if (ptr_reg->type & PTR_MAYBE_NULL) { verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", - dst, reg_type_str[ptr_reg->type]); + dst, reg_type_str(env, ptr_reg->type)); return -EACCES; + } + + switch (base_type(ptr_reg->type)) { case CONST_PTR_TO_MAP: /* smin_val represents the known value */ if (known && smin_val == 0 && opcode == BPF_ADD) @@ -7004,14 +7379,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, fallthrough; case PTR_TO_PACKET_END: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: verbose(env, "R%d pointer arithmetic on %s prohibited\n", - dst, reg_type_str[ptr_reg->type]); + dst, reg_type_str(env, ptr_reg->type)); return -EACCES; default: break; @@ -7984,12 +8356,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state); + print_verifier_state(env, state, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state); + print_verifier_state(env, state, true); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -8098,6 +8470,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); } } else { /* case: R = imm @@ -8212,7 +8588,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range = dst_reg->off; if (range_right_open) - new_range--; + new_range++; /* Examples for register markings: * @@ -8730,17 +9106,17 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null) { - if (reg_type_may_be_null(reg->type) && reg->id == id && + if (type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { - /* Old offset (both fixed and variable parts) should - * have been known-zero, because we don't allow pointer - * arithmetic on pointers that might be NULL. - */ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0) || reg->off)) { - __mark_reg_known_zero(reg); - reg->off = 0; + /* Old offset (both fixed and variable parts) should + * have been known-zero, because we don't allow pointer + * arithmetic on pointers that might be NULL. If we + * see this happening, don't convert the register. + */ + return; } if (is_null) { reg->type = SCALAR_VALUE; @@ -9108,7 +9484,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && - reg_type_may_be_null(dst_reg->type)) { + type_may_be_null(dst_reg->type)) { /* Mark all identical registers in each branch as either * safe or unknown depending R == 0 or R != 0 conditional. */ @@ -9124,7 +9500,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_verifier_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -9159,11 +9535,15 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } - if (insn->src_reg == BPF_PSEUDO_BTF_ID) { - mark_reg_known_zero(env, regs, insn->dst_reg); + /* All special src_reg cases are listed below. From this point onwards + * we either succeed and assign a corresponding dst_reg->type after + * zeroing the offset, or fail and reject the program. + */ + mark_reg_known_zero(env, regs, insn->dst_reg); + if (insn->src_reg == BPF_PSEUDO_BTF_ID) { dst_reg->type = aux->btf_var.reg_type; - switch (dst_reg->type) { + switch (base_type(dst_reg->type)) { case PTR_TO_MEM: dst_reg->mem_size = aux->btf_var.mem_size; break; @@ -9181,7 +9561,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_FUNC) { struct bpf_prog_aux *aux = env->prog->aux; - u32 subprogno = insn[1].imm; + u32 subprogno = find_subprog(env, + env->insn_idx + insn->imm + 1); if (!aux->func_info) { verbose(env, "missing btf func_info\n"); @@ -9198,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) } map = env->used_maps[aux->map_index]; - mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || @@ -9302,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); if (err < 0) return err; @@ -9361,7 +9741,7 @@ static int check_return_code(struct bpf_verifier_env *env) /* enforce return zero from async callbacks like timer */ if (reg->type != SCALAR_VALUE) { verbose(env, "In async callback the register R0 is not a known value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } @@ -9375,7 +9755,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } return 0; @@ -9439,7 +9819,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", - reg_type_str[reg->type]); + reg_type_str(env, reg->type)); return -EINVAL; } @@ -10022,6 +10402,78 @@ err_free: return err; } +#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) +#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE + +static int check_core_relo(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +{ + u32 i, nr_core_relo, ncopy, expected_size, rec_size; + struct bpf_core_relo core_relo = {}; + struct bpf_prog *prog = env->prog; + const struct btf *btf = prog->aux->btf; + struct bpf_core_ctx ctx = { + .log = &env->log, + .btf = btf, + }; + bpfptr_t u_core_relo; + int err; + + nr_core_relo = attr->core_relo_cnt; + if (!nr_core_relo) + return 0; + if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) + return -EINVAL; + + rec_size = attr->core_relo_rec_size; + if (rec_size < MIN_CORE_RELO_SIZE || + rec_size > MAX_CORE_RELO_SIZE || + rec_size % sizeof(u32)) + return -EINVAL; + + u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); + expected_size = sizeof(struct bpf_core_relo); + ncopy = min_t(u32, expected_size, rec_size); + + /* Unlike func_info and line_info, copy and apply each CO-RE + * relocation record one at a time. + */ + for (i = 0; i < nr_core_relo; i++) { + /* future proofing when sizeof(bpf_core_relo) changes */ + err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); + if (err) { + if (err == -E2BIG) { + verbose(env, "nonzero tailing record in core_relo"); + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, core_relo_rec_size), + &expected_size, sizeof(expected_size))) + err = -EFAULT; + } + break; + } + + if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { + err = -EFAULT; + break; + } + + if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { + verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", + i, core_relo.insn_off, prog->len); + err = -EINVAL; + break; + } + + err = bpf_core_apply(&ctx, &core_relo, i, + &prog->insnsi[core_relo.insn_off / 8]); + if (err) + break; + bpfptr_add(&u_core_relo, rec_size); + } + return err; +} + static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, bpfptr_t uattr) @@ -10052,6 +10504,10 @@ static int check_btf_info(struct bpf_verifier_env *env, if (err) return err; + err = check_core_relo(env, attr, uattr); + if (err) + return err; + return 0; } @@ -10220,7 +10676,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return true; if (rcur->type == NOT_INIT) return false; - switch (rold->type) { + switch (base_type(rold->type)) { case SCALAR_VALUE: if (env->explore_alu_limits) return false; @@ -10242,6 +10698,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: + /* a PTR_TO_MAP_VALUE could be safe to use as a + * PTR_TO_MAP_VALUE_OR_NULL into the same map. + * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- + * checked, doing so could have affected others with the same + * id, and we can't check for that because we lost the id when + * we converted to a PTR_TO_MAP_VALUE. + */ + if (type_may_be_null(rold->type)) { + if (!type_may_be_null(rcur->type)) + return false; + if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) + return false; + /* Check our ids match any regs they're supposed to */ + return check_ids(rold->id, rcur->id, idmap); + } + /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. * 'id' is not compared, since it's only used for maps with @@ -10253,20 +10725,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); - case PTR_TO_MAP_VALUE_OR_NULL: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: if (rcur->type != rold->type) @@ -10295,11 +10753,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: /* Only valid matches are exact, which memcmp() above * would have accepted @@ -10356,9 +10811,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * return false to continue verification of this path */ return false; - if (i % BPF_REG_SIZE) + if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) continue; - if (old->stack[spi].slot_type[0] != STACK_SPILL) + if (!is_spilled_reg(&old->stack[spi])) continue; if (!regsafe(env, &old->stack[spi].spilled_ptr, &cur->stack[spi].spilled_ptr, idmap)) @@ -10565,7 +11020,7 @@ static int propagate_precision(struct bpf_verifier_env *env, } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) + if (!is_spilled_reg(&state->stack[i])) continue; state_reg = &state->stack[i].spilled_ptr; if (state_reg->type != SCALAR_VALUE || @@ -10825,17 +11280,13 @@ next: /* Return true if it's OK to have the same insn return a different type. */ static bool reg_type_mismatch_ok(enum bpf_reg_type type) { - switch (type) { + switch (base_type(type)) { case PTR_TO_CTX: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: - case PTR_TO_BTF_ID_OR_NULL: return false; default: return true; @@ -10915,16 +11366,12 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (env->log.level & BPF_LOG_LEVEL2 || - (env->log.level & BPF_LOG_LEVEL && do_print_state)) { - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "%d:", env->insn_idx); - else - verbose(env, "\nfrom %d to %d%s:", - env->prev_insn_idx, env->insn_idx, - env->cur_state->speculative ? - " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe]); + if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); + print_verifier_state(env, state->frame[state->curframe], true); do_print_state = false; } @@ -10935,9 +11382,15 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; + if (verifier_state_scratched(env)) + print_insn_state(env, state->frame[state->curframe]); + verbose_linfo(env, env->insn_idx, "; "); + env->prev_log_len = env->log.len_used; verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + env->prev_insn_print_len = env->log.len_used - env->prev_log_len; + env->prev_log_len = env->log.len_used; } if (bpf_prog_is_dev_bound(env->prog->aux)) { @@ -11059,7 +11512,7 @@ static int do_check(struct bpf_verifier_env *env) if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg, - reg_type_str[reg_state(env, insn->dst_reg)->type]); + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); return -EACCES; } @@ -11076,7 +11529,8 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || - insn->off != 0 || + (insn->src_reg != BPF_PSEUDO_KFUNC_CALL + && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || @@ -11145,6 +11599,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: + mark_verifier_state_scratched(env); update_branch_counts(env, env->cur_state); err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log); @@ -11310,7 +11765,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, err = -EINVAL; goto err_put; } - aux->btf_var.reg_type = PTR_TO_MEM; + aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; aux->btf_var.mem_size = tsize; } else { aux->btf_var.reg_type = PTR_TO_BTF_ID; @@ -11435,6 +11890,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if (map_value_has_timer(map)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_timer yet\n"); + return -EINVAL; + } + } + if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); @@ -11463,6 +11925,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } break; case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: break; default: verbose(env, @@ -12350,14 +12815,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (bpf_pseudo_func(insn)) { - env->insn_aux_data[i].call_imm = insn->imm; - /* subprog is encoded in insn[1].imm */ + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) continue; - } - if (!bpf_pseudo_call(insn)) - continue; /* Upon error here we cannot fall back to interpreter but * need a hard reject of the program. Thus -EFAULT is * propagated in any case. @@ -12378,6 +12838,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; + if (bpf_pseudo_func(insn)) + /* jit (e.g. x86_64) may emit fewer instructions + * if it learns a u32 imm is the same as a u64 imm. + * Force a non zero here. + */ + insn[1].imm = 1; } err = bpf_prog_alloc_jited_linfo(prog); @@ -12432,6 +12898,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; + func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -12461,7 +12928,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (bpf_pseudo_func(insn)) { - subprog = insn[1].imm; + subprog = insn->off; insn[0].imm = (u32)(long)func[subprog]->bpf_func; insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; continue; @@ -12469,8 +12936,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; - insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func); } /* we use the aux data to keep a list of the start addresses @@ -12513,7 +12979,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (bpf_pseudo_func(insn)) { insn[0].imm = env->insn_aux_data[i].call_imm; - insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + insn[1].imm = insn->off; + insn->off = 0; continue; } if (!bpf_pseudo_call(insn)) @@ -12618,10 +13085,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, { const struct bpf_kfunc_desc *desc; + if (!insn->imm) { + verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); + return -EINVAL; + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ - desc = find_kfunc_desc(env->prog, insn->imm); + desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", insn->imm); @@ -12639,6 +13111,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; + enum bpf_attach_type eatype = prog->expected_attach_type; bool expect_blinding = bpf_jit_blinding_enabled(prog); enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; @@ -12902,7 +13375,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || - insn->imm == BPF_FUNC_redirect_map)) { + insn->imm == BPF_FUNC_redirect_map || + insn->imm == BPF_FUNC_for_each_map_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -12946,36 +13420,37 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, + (int (*)(struct bpf_map *map, + bpf_callback_t callback_fn, + void *callback_ctx, + u64 flags))NULL)); patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: - insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); continue; case BPF_FUNC_map_update_elem: - insn->imm = BPF_CAST_CALL(ops->map_update_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_update_elem); continue; case BPF_FUNC_map_delete_elem: - insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_delete_elem); continue; case BPF_FUNC_map_push_elem: - insn->imm = BPF_CAST_CALL(ops->map_push_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_push_elem); continue; case BPF_FUNC_map_pop_elem: - insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_pop_elem); continue; case BPF_FUNC_map_peek_elem: - insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_peek_elem); continue; case BPF_FUNC_redirect_map: - insn->imm = BPF_CAST_CALL(ops->map_redirect) - - __bpf_call_base; + insn->imm = BPF_CALL_IMM(ops->map_redirect); + continue; + case BPF_FUNC_for_each_map_elem: + insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; } @@ -13007,11 +13482,79 @@ patch_map_ops_generic: continue; } + /* Implement bpf_get_func_arg inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); + insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); + insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); + insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); + insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); + insn_buf[7] = BPF_JMP_A(1); + insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); + cnt = 9; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement bpf_get_func_ret inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ret) { + if (eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); + insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); + insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); + cnt = 6; + } else { + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); + cnt = 1; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Implement get_func_arg_cnt inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_arg_cnt) { + /* Load nr_args from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) + return -ENOMEM; + + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + /* Implement bpf_get_func_ip inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ip) { - /* Load IP address from ctx - 8 */ - insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + /* Load IP address from ctx - 16 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); if (!new_prog) @@ -13125,7 +13668,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_known_zero(env, regs, i); else if (regs[i].type == SCALAR_VALUE) mark_reg_unknown(env, regs, i); - else if (regs[i].type == PTR_TO_MEM_OR_NULL) { + else if (base_type(regs[i].type) == PTR_TO_MEM) { const u32 mem_size = regs[i].mem_size; mark_reg_known_zero(env, regs, i); @@ -13319,7 +13862,7 @@ BTF_SET_START(btf_non_sleepable_error_inject) /* Three functions below can be called from sleepable and non-sleepable context. * Assume non-sleepable from bpf safety point of view. */ -BTF_ID(func, __add_to_page_cache_locked) +BTF_ID(func, __filemap_add_folio) BTF_ID(func, should_fail_alloc_page) BTF_ID(func, should_failslab) BTF_SET_END(btf_non_sleepable_error_inject) @@ -13713,13 +14256,15 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) log->ubuf = (char __user *) (unsigned long) attr->log_buf; log->len_total = attr->log_size; - ret = -EINVAL; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || - !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) + if (!bpf_verifier_log_attr_valid(log)) { + ret = -EINVAL; goto err_unlock; + } } + mark_verifier_state_clean(env); + if (IS_ERR(btf_vmlinux)) { /* Either gcc or pahole or kernel are broken. */ verbose(env, "in-kernel BTF is malformed\n"); @@ -13826,6 +14371,7 @@ skip_full_check: env->verification_time = ktime_get_ns() - start_time; print_verification_stats(env); + env->prog->aux->verified_insns = env->insn_processed; if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; |
