From 18688de203b47e5d8d9d0953385bf30b5949324f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:44 +0530 Subject: bpf: Fix UAF due to race between btf_try_get_module and load_module While working on code to populate kfunc BTF ID sets for module BTF from its initcall, I noticed that by the time the initcall is invoked, the module BTF can already be seen by userspace (and the BPF verifier). The existing btf_try_get_module calls try_module_get which only fails if mod->state == MODULE_STATE_GOING, i.e. it can increment module reference when module initcall is happening in parallel. Currently, BTF parsing happens from MODULE_STATE_COMING notifier callback. At this point, the module initcalls have not been invoked. The notifier callback parses and prepares the module BTF, allocates an ID, which publishes it to userspace, and then adds it to the btf_modules list allowing the kernel to invoke btf_try_get_module for the BTF. However, at this point, the module has not been fully initialized (i.e. its initcalls have not finished). The code in module.c can still fail and free the module, without caring for other users. However, nothing stops btf_try_get_module from succeeding between the state transition from MODULE_STATE_COMING to MODULE_STATE_LIVE. This leads to a use-after-free issue when BPF program loads successfully in the state transition, load_module's do_init_module call fails and frees the module, and BPF program fd on close calls module_put for the freed module. Future patch has test case to verify we don't regress in this area in future. There are multiple points after prepare_coming_module (in load_module) where failure can occur and module loading can return error. We illustrate and test for the race using the last point where it can practically occur (in module __init function). An illustration of the race: CPU 0 CPU 1 load_module notifier_call(MODULE_STATE_COMING) btf_parse_module btf_alloc_id // Published to userspace list_add(&btf_mod->list, btf_modules) mod->init(...) ... ^ bpf_check | check_pseudo_btf_id | btf_try_get_module | returns true | ... ... | module __init in progress return prog_fd | ... ... V if (ret < 0) free_module(mod) ... close(prog_fd) ... bpf_prog_free_deferred module_put(used_btf.mod) // use-after-free We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier callback when MODULE_STATE_LIVE state is reached for the module, so that we return NULL from btf_try_get_module for modules that are not fully formed. Since try_module_get already checks that module is not in MODULE_STATE_GOING state, and that is the only transition a live module can make before being removed from btf_modules list, this is enough to close the race and prevent the bug. A later selftest patch crafts the race condition artifically to verify that it has been fixed, and that verifier fails to load program (with ENXIO). Lastly, a couple of comments: 1. Even if this race didn't exist, it seems more appropriate to only access resources (ksyms and kfuncs) of a fully formed module which has been initialized completely. 2. This patch was born out of need for synchronization against module initcall for the next patch, so it is needed for correctness even without the aforementioned race condition. The BTF resources initialized by module initcall are set up once and then only looked up, so just waiting until the initcall has finished ensures correct behavior. Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 33bb8ae4a804..f25bca59909d 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6200,12 +6200,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; + int flags; }; static LIST_HEAD(btf_modules); @@ -6233,7 +6238,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, int err = 0; if (mod->btf_data_size == 0 || - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) goto out; switch (op) { @@ -6291,6 +6297,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, btf_mod->sysfs_attr = attr; } + break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); @@ -6338,7 +6355,12 @@ struct module *btf_try_get_module(const struct btf *btf) if (btf_mod->btf != btf) continue; - if (try_module_get(btf_mod->module)) + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; -- cgit v1.2.3 From dee872e124e8d5de22b68c58f6f6c3f5e8889160 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:45 +0530 Subject: bpf: Populate kfunc BTF ID sets in struct btf This patch prepares the kernel to support putting all kinds of kfunc BTF ID sets in the struct btf itself. The various kernel subsystems will make register_btf_kfunc_id_set call in the initcalls (for built-in code and modules). The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS, STRUCT_OPS, and 'types' are check (allowed or not), acquire, release, and ret_null (with PTR_TO_BTF_ID_OR_NULL return type). A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a set of certain hook and type for vmlinux sets, since they are allocated on demand, and otherwise set as NULL. Module sets can only be registered once per hook and type, hence they are directly assigned. A new btf_kfunc_id_set_contains function is exposed for use in verifier, this new method is faster than the existing list searching method, and is also automatic. It also lets other code not care whether the set is unallocated or not. Note that module code can only do single register_btf_kfunc_id_set call per hook. This is why sorting is only done for in-kernel vmlinux sets, because there might be multiple sets for the same hook and type that must be concatenated, hence sorting them is required to ensure bsearch in btf_id_set_contains continues to work correctly. Next commit will update the kernel users to make use of this infrastructure. Finally, add __maybe_unused annotation for BTF ID macros for the !CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during build time. The previous patch is also needed to provide synchronization against initialization for module BTF's kfunc_set_tab introduced here, as described below: The kfunc_set_tab pointer in struct btf is write-once (if we consider the registration phase (comprised of multiple register_btf_kfunc_id_set calls) as a single operation). In this sense, once it has been fully prepared, it isn't modified, only used for lookup (from the verifier context). For btf_vmlinux, it is initialized fully during the do_initcalls phase, which happens fairly early in the boot process, before any processes are present. This also eliminates the possibility of bpf_check being called at that point, thus relieving us of ensuring any synchronization between the registration and lookup function (btf_kfunc_id_set_contains). However, the case for module BTF is a bit tricky. The BTF is parsed, prepared, and published from the MODULE_STATE_COMING notifier callback. After this, the module initcalls are invoked, where our registration function will be called to populate the kfunc_set_tab for module BTF. At this point, BTF may be available to userspace while its corresponding module is still intializing. A BTF fd can then be passed to verifier using bpf syscall (e.g. for kfunc call insn). Hence, there is a race window where verifier may concurrently try to lookup the kfunc_set_tab. To prevent this race, we must ensure the operations are serialized, or waiting for the __init functions to complete. In the earlier registration API, this race was alleviated as verifier bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added by the registration function (called usually at the end of module __init function after all module resources have been initialized). If the verifier made the check_kfunc_call before kfunc BTF ID was added to the list, it would fail verification (saying call isn't allowed). The access to list was protected using a mutex. Now, it would still fail verification, but for a different reason (returning ENXIO due to the failed btf_try_get_module call in add_kfunc_call), because if the __init call is in progress the module will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE transition, and the BTF_MODULE_LIVE flag for btf_module instance will not be set, so the btf_try_get_module call will fail. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f25bca59909d..74037bd65d17 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -198,6 +198,21 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); +enum btf_kfunc_hook { + BTF_KFUNC_HOOK_XDP, + BTF_KFUNC_HOOK_TC, + BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_MAX, +}; + +enum { + BTF_KFUNC_SET_MAX_CNT = 32, +}; + +struct btf_kfunc_set_tab { + struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; +}; + struct btf { void *data; struct btf_type **types; @@ -212,6 +227,7 @@ struct btf { refcount_t refcnt; u32 id; struct rcu_head rcu; + struct btf_kfunc_set_tab *kfunc_set_tab; /* split BTF support */ struct btf *base_btf; @@ -1531,8 +1547,30 @@ static void btf_free_id(struct btf *btf) spin_unlock_irqrestore(&btf_idr_lock, flags); } +static void btf_free_kfunc_set_tab(struct btf *btf) +{ + struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; + int hook, type; + + if (!tab) + return; + /* For module BTF, we directly assign the sets being registered, so + * there is nothing to free except kfunc_set_tab. + */ + if (btf_is_module(btf)) + goto free_tab; + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) + kfree(tab->sets[hook][type]); + } +free_tab: + kfree(tab); + btf->kfunc_set_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_kfunc_set_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); @@ -6371,6 +6409,36 @@ struct module *btf_try_get_module(const struct btf *btf) return res; } +/* Returns struct btf corresponding to the struct module + * + * This function can return NULL or ERR_PTR. Note that caller must + * release reference for struct btf iff btf_is_module is true. + */ +static struct btf *btf_get_module_btf(const struct module *module) +{ + struct btf *btf = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; +#endif + + if (!module) + return bpf_get_btf_vmlinux(); +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_get(btf_mod->btf); + btf = btf_mod->btf; + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return btf; +} + BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) { struct btf *btf; @@ -6438,7 +6506,181 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) BTF_TRACING_TYPE_xxx #undef BTF_TRACING_TYPE -/* BTF ID set registration API for modules */ +/* Kernel Function (kfunc) BTF ID set registration API */ + +static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + struct btf_id_set *add_set, bool vmlinux_set) +{ + struct btf_kfunc_set_tab *tab; + struct btf_id_set *set; + u32 set_cnt; + int ret; + + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + ret = -EINVAL; + goto end; + } + + if (!add_set->cnt) + return 0; + + tab = btf->kfunc_set_tab; + if (!tab) { + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); + if (!tab) + return -ENOMEM; + btf->kfunc_set_tab = tab; + } + + set = tab->sets[hook][type]; + /* Warn when register_btf_kfunc_id_set is called twice for the same hook + * for module sets. + */ + if (WARN_ON_ONCE(set && !vmlinux_set)) { + ret = -EINVAL; + goto end; + } + + /* We don't need to allocate, concatenate, and sort module sets, because + * only one is allowed per hook. Hence, we can directly assign the + * pointer and return. + */ + if (!vmlinux_set) { + tab->sets[hook][type] = add_set; + return 0; + } + + /* In case of vmlinux sets, there may be more than one set being + * registered per hook. To create a unified set, we allocate a new set + * and concatenate all individual sets being registered. While each set + * is individually sorted, they may become unsorted when concatenated, + * hence re-sorting the final set again is required to make binary + * searching the set using btf_id_set_contains function work. + */ + set_cnt = set ? set->cnt : 0; + + if (set_cnt > U32_MAX - add_set->cnt) { + ret = -EOVERFLOW; + goto end; + } + + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { + ret = -E2BIG; + goto end; + } + + /* Grow set */ + set = krealloc(tab->sets[hook][type], + offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!set) { + ret = -ENOMEM; + goto end; + } + + /* For newly allocated set, initialize set->cnt to 0 */ + if (!tab->sets[hook][type]) + set->cnt = 0; + tab->sets[hook][type] = set; + + /* Concatenate the two sets */ + memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + set->cnt += add_set->cnt; + + sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + + return 0; +end: + btf_free_kfunc_set_tab(btf); + return ret; +} + +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) +{ + bool vmlinux_set = !btf_is_module(btf); + int type, ret; + + for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { + if (!kset->sets[type]) + continue; + + ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); + if (ret) + break; + } + return ret; +} + +static bool __btf_kfunc_id_set_contains(const struct btf *btf, + enum btf_kfunc_hook hook, + enum btf_kfunc_type type, + u32 kfunc_btf_id) +{ + struct btf_id_set *set; + + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) + return false; + if (!btf->kfunc_set_tab) + return false; + set = btf->kfunc_set_tab->sets[hook][type]; + if (!set) + return false; + return btf_id_set_contains(set, kfunc_btf_id); +} + +static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) +{ + switch (prog_type) { + case BPF_PROG_TYPE_XDP: + return BTF_KFUNC_HOOK_XDP; + case BPF_PROG_TYPE_SCHED_CLS: + return BTF_KFUNC_HOOK_TC; + case BPF_PROG_TYPE_STRUCT_OPS: + return BTF_KFUNC_HOOK_STRUCT_OPS; + default: + return BTF_KFUNC_HOOK_MAX; + } +} + +/* Caution: + * Reference to the module (obtained using btf_try_get_module) corresponding to + * the struct btf *MUST* be held when calling this function from verifier + * context. This is usually true as we stash references in prog's kfunc_btf_tab; + * keeping the reference for the duration of the call provides the necessary + * protection for looking up a well-formed btf->kfunc_set_tab. + */ +bool btf_kfunc_id_set_contains(const struct btf *btf, + enum bpf_prog_type prog_type, + enum btf_kfunc_type type, u32 kfunc_btf_id) +{ + enum btf_kfunc_hook hook; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); +} + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *kset) +{ + enum btf_kfunc_hook hook; + struct btf *btf; + int ret; + + btf = btf_get_module_btf(kset->owner); + if (IS_ERR_OR_NULL(btf)) + return btf ? PTR_ERR(btf) : -ENOENT; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + ret = btf_populate_kfunc_set(btf, hook, kset); + /* reference is only taken for module BTF */ + if (btf_is_module(btf)) + btf_put(btf); + return ret; +} +EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); #ifdef CONFIG_DEBUG_INFO_BTF_MODULES -- cgit v1.2.3 From b202d84422223b7222cba5031d182f20b37e146e Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:46 +0530 Subject: bpf: Remove check_kfunc_call callback and old kfunc BTF ID API Completely remove the old code for check_kfunc_call to help it work with modules, and also the callback itself. The previous commit adds infrastructure to register all sets and put them in vmlinux or module BTF, and concatenates all related sets organized by the hook and the type. Once populated, these sets remain immutable for the lifetime of the struct btf. Also, since we don't need the 'owner' module anywhere when doing check_kfunc_call, drop the 'btf_modp' module parameter from find_kfunc_desc_btf. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 46 ---------------------------------------------- kernel/bpf/verifier.c | 20 ++++++++------------ 2 files changed, 8 insertions(+), 58 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 74037bd65d17..4be5cf629ca9 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6682,52 +6682,6 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, } EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); -#ifdef CONFIG_DEBUG_INFO_BTF_MODULES - -void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ - mutex_lock(&l->mutex); - list_add(&s->list, &l->list); - mutex_unlock(&l->mutex); -} -EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); - -void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, - struct kfunc_btf_id_set *s) -{ - mutex_lock(&l->mutex); - list_del_init(&s->list); - mutex_unlock(&l->mutex); -} -EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); - -bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, - struct module *owner) -{ - struct kfunc_btf_id_set *s; - - mutex_lock(&klist->mutex); - list_for_each_entry(s, &klist->list, list) { - if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { - mutex_unlock(&klist->mutex); - return true; - } - } - mutex_unlock(&klist->mutex); - return false; -} - -#define DEFINE_KFUNC_BTF_ID_LIST(name) \ - struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ - __MUTEX_INITIALIZER(name.mutex) }; \ - EXPORT_SYMBOL_GPL(name) - -DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); -DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); - -#endif - int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bfb45381fb3f..72802c1eb5ac 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1741,7 +1741,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) } static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, - s16 offset, struct module **btf_modp) + s16 offset) { struct bpf_kfunc_btf kf_btf = { .offset = offset }; struct bpf_kfunc_btf_tab *tab; @@ -1795,8 +1795,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); } - if (btf_modp) - *btf_modp = b->module; return b->btf; } @@ -1813,8 +1811,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) } static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset, - struct module **btf_modp) + u32 func_id, s16 offset) { if (offset) { if (offset < 0) { @@ -1825,7 +1822,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, return ERR_PTR(-EINVAL); } - return __find_kfunc_desc_btf(env, offset, btf_modp); + return __find_kfunc_desc_btf(env, offset); } return btf_vmlinux ?: ERR_PTR(-ENOENT); } @@ -1888,7 +1885,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + desc_btf = find_kfunc_desc_btf(env, func_id, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -2349,7 +2346,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); if (IS_ERR(desc_btf)) return ""; @@ -6820,7 +6817,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; - struct module *btf_mod = NULL; const struct btf_param *args; struct btf *desc_btf; int err; @@ -6829,7 +6825,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -6838,8 +6834,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id, btf_mod)) { + if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_CHECK, func_id)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; -- cgit v1.2.3 From d583691c47dc0424ebe926000339a6d6cd590ff7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:47 +0530 Subject: bpf: Introduce mem, size argument pair support for kfunc BPF helpers can associate two adjacent arguments together to pass memory of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments. Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to implement similar support. The ARG_CONST_SIZE processing for helpers is refactored into a common check_mem_size_reg helper that is shared with kfunc as well. kfunc ptr_to_mem support follows logic similar to global functions, where verification is done as if pointer is not null, even when it may be null. This leads to a simple to follow rule for writing kfunc: always check the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the PTR_TO_CTX case is also only safe when the helper expecting pointer to program ctx is not exposed to other programs where same struct is not ctx type. In that case, the type check will fall through to other cases and would permit passing other types of pointers, possibly NULL at runtime. Currently, we require the size argument to be suffixed with "__sz" in the parameter name. This information is then recorded in kernel BTF and verified during function argument checking. In the future we can use BTF tagging instead, and modify the kernel function definitions. This will be a purely kernel-side change. This allows us to have some form of backwards compatibility for structures that are passed in to the kernel function with their size, and allow variable length structures to be passed in if they are accompanied by a size parameter. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 48 +++++++++++++++++-- kernel/bpf/verifier.c | 124 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 124 insertions(+), 48 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4be5cf629ca9..cf46694cb266 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5654,6 +5654,32 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, return true; } +static bool is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + int len, sfx_len = sizeof("__sz") - 1; + const struct btf_type *t; + const char *param_name; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len < sfx_len) + return false; + param_name += len - sfx_len; + if (strncmp(param_name, "__sz", sfx_len)) + return false; + + return true; +} + static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, @@ -5765,17 +5791,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, u32 type_size; if (is_kfunc) { + bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]); + /* Permit pointer to mem, but only when argument * type is pointer to scalar, or struct composed * (recursively) of scalars. + * When arg_mem_size is true, the pointer can be + * void *. */ if (!btf_type_is_scalar(ref_t) && - !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { + !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { bpf_log(log, - "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", - i, btf_type_str(ref_t), ref_tname); + "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", + i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); return -EINVAL; } + + /* Check for mem, len pair */ + if (arg_mem_size) { + if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) { + bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", + i, i + 1); + return -EINVAL; + } + i++; + continue; + } } resolve_ret = btf_resolve_size(btf, ref_t, &type_size); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 72802c1eb5ac..2b186185b6b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4864,6 +4864,62 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +static int check_mem_size_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + bool zero_size_allowed, + struct bpf_call_arg_meta *meta) +{ + int err; + + /* This is used to refine r0 return value bounds for helpers + * that enforce this value as an upper bound on return values. + * See do_refine_retval_range() for helpers that can refine + * the return value. C type of helper is u32 so we pull register + * bound from umax_value however, if negative verifier errors + * out. Only upper bounds can be learned because retval is an + * int type and negative retvals are allowed. + */ + if (meta) + meta->msize_max_value = reg->umax_value; + + /* The register is SCALAR_VALUE; the access check + * happens using its boundaries. + */ + if (!tnum_is_const(reg->var_off)) + /* For unprivileged variable accesses, disable raw + * mode so that the program is required to + * initialize all the memory that the helper could + * just partially fill up. + */ + meta = NULL; + + if (reg->smin_value < 0) { + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", + regno); + return -EACCES; + } + + if (reg->umin_value == 0) { + err = check_helper_mem_access(env, regno - 1, 0, + zero_size_allowed, + meta); + if (err) + return err; + } + + if (reg->umax_value >= BPF_MAX_VAR_SIZ) { + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", + regno); + return -EACCES; + } + err = check_helper_mem_access(env, regno - 1, + reg->umax_value, + zero_size_allowed, meta); + if (!err) + err = mark_chain_precision(env, regno); + return err; +} + int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size) { @@ -4887,6 +4943,28 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return check_helper_mem_access(env, regno, mem_size, true, NULL); } +int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) +{ + struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; + bool may_be_null = type_may_be_null(mem_reg->type); + struct bpf_reg_state saved_reg; + int err; + + WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); + + if (may_be_null) { + saved_reg = *mem_reg; + mark_ptr_not_null_reg(mem_reg); + } + + err = check_mem_size_reg(env, reg, regno, true, NULL); + + if (may_be_null) + *mem_reg = saved_reg; + return err; +} + /* Implementation details: * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL * Two bpf_map_lookups (even with the same key) will have different reg->id. @@ -5408,51 +5486,7 @@ skip_type_check: } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - /* This is used to refine r0 return value bounds for helpers - * that enforce this value as an upper bound on return values. - * See do_refine_retval_range() for helpers that can refine - * the return value. C type of helper is u32 so we pull register - * bound from umax_value however, if negative verifier errors - * out. Only upper bounds can be learned because retval is an - * int type and negative retvals are allowed. - */ - meta->msize_max_value = reg->umax_value; - - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. - */ - if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. - */ - meta = NULL; - - if (reg->smin_value < 0) { - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", - regno); - return -EACCES; - } - - if (reg->umin_value == 0) { - err = check_helper_mem_access(env, regno - 1, 0, - zero_size_allowed, - meta); - if (err) - return err; - } - - if (reg->umax_value >= BPF_MAX_VAR_SIZ) { - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", - regno); - return -EACCES; - } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); - if (!err) - err = mark_chain_precision(env, regno); + err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", -- cgit v1.2.3 From 5c073f26f9dc78a6c8194b23eac7537c9692c7d7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 14 Jan 2022 22:09:48 +0530 Subject: bpf: Add reference tracking support to kfunc This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc to be a reference, by reusing acquire_reference_state/release_reference support for existing in-kernel bpf helpers. We make use of the three kfunc types: - BTF_KFUNC_TYPE_ACQUIRE Return true if kfunc_btf_id is an acquire kfunc. This will acquire_reference_state for the returned PTR_TO_BTF_ID (this is the only allow return value). Note that acquire kfunc must always return a PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected. - BTF_KFUNC_TYPE_RELEASE Return true if kfunc_btf_id is a release kfunc. This will release the reference to the passed in PTR_TO_BTF_ID which has a reference state (from earlier acquire kfunc). The btf_check_func_arg_match returns the regno (of argument register, hence > 0) if the kfunc is a release kfunc, and a proper referenced PTR_TO_BTF_ID is being passed to it. This is similar to how helper call check uses bpf_call_arg_meta to store the ref_obj_id that is later used to release the reference. Similar to in-kernel helper, we only allow passing one referenced PTR_TO_BTF_ID as an argument. It can also be passed in to normal kfunc, but in case of release kfunc there must always be one PTR_TO_BTF_ID argument that is referenced. - BTF_KFUNC_TYPE_RET_NULL For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence force caller to mark the pointer not null (using check) before accessing it. Note that taking into account the case fixed by commit 93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type") we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more return types are supported by kfunc, which have a _OR_NULL variant, it might be better to move this id generation under a common reg_type_may_be_null check, similar to the case in the commit. Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be extended in the future to other BPF helpers as well. For now, we can rely on the btf_struct_ids_match check to ensure we get the pointer to the expected struct type. In the future, care needs to be taken to avoid ambiguity for reference PTR_TO_BTF_ID passed to release function, in case multiple candidates can release same BTF ID. e.g. there might be two release kfuncs (or kfunc and helper): foo(struct abc *p); bar(struct abc *p); ... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In this case we would need to track the acquire function corresponding to the release function to avoid type confusion, and store this information in the register state so that an incorrect program can be rejected. This is not a problem right now, hence it is left as an exercise for the future patch introducing such a case in the kernel. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 32 +++++++++++++++++++++++++++++-- kernel/bpf/verifier.c | 52 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cf46694cb266..57f5fd5af2f9 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5686,11 +5686,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, bool ptr_to_mem_ok) { struct bpf_verifier_log *log = &env->log; + u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; - u32 i, nargs, ref_id; + int ref_regno = 0; + bool rel = false; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -5768,6 +5770,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (reg->type == PTR_TO_BTF_ID) { reg_btf = reg->btf; reg_ref_id = reg->btf_id; + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ + if (reg->ref_obj_id) { + if (ref_obj_id) { + bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, ref_obj_id); + return -EFAULT; + } + ref_regno = regno; + ref_obj_id = reg->ref_obj_id; + } } else { reg_btf = btf_vmlinux; reg_ref_id = *reg2btf_ids[reg->type]; @@ -5838,7 +5850,23 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, } } - return 0; + /* Either both are set, or neither */ + WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno)); + if (is_kfunc) { + rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RELEASE, func_id); + /* We already made sure ref_obj_id is set only for one argument */ + if (rel && !ref_obj_id) { + bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", + func_name); + return -EINVAL; + } + /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to + * other kfuncs works + */ + } + /* returns argument register number > 0 in case of reference release kfunc */ + return rel ? ref_regno : 0; } /* Compare BTF of a function with given bpf_reg_state. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2b186185b6b2..8c5a46d41f28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -452,7 +452,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || - base_type(type) == PTR_TO_MEM; + base_type(type) == PTR_TO_MEM || + base_type(type) == PTR_TO_BTF_ID; } static bool type_is_rdonly_mem(u32 type) @@ -3493,11 +3494,6 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, #define MAX_PACKET_OFF 0xffff -static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) -{ - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; -} - static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t) @@ -6845,15 +6841,17 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } -static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; + int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; - int err; + bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) @@ -6875,16 +6873,36 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } + acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_ACQUIRE, func_id); + /* Check the arguments */ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); - if (err) + if (err < 0) return err; + /* In case of release function, we get register number of refcounted + * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + */ + if (err) { + err = release_reference(env, regs[err].ref_obj_id); + if (err) { + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, func_id); + return err; + } + } for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); + + if (acq && !btf_type_is_ptr(t)) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } + if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); @@ -6903,7 +6921,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; + if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), + BTF_KFUNC_TYPE_RET_NULL, func_id)) { + regs[BPF_REG_0].type |= PTR_MAYBE_NULL; + /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ + regs[BPF_REG_0].id = ++env->id_gen; + } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + if (acq) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + regs[BPF_REG_0].id = id; + regs[BPF_REG_0].ref_obj_id = id; + } } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -11548,7 +11580,7 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) - err = check_kfunc_call(env, insn); + err = check_kfunc_call(env, insn, &env->insn_idx); else err = check_helper_call(env, insn, &env->insn_idx); if (err) -- cgit v1.2.3