summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/bpf_local_storage.c4
-rw-r--r--kernel/bpf/btf.c199
-rw-r--r--kernel/bpf/core.c13
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/helpers.c94
-rw-r--r--kernel/bpf/memalloc.c5
-rw-r--r--kernel/bpf/syscall.c48
-rw-r--r--kernel/bpf/verifier.c606
8 files changed, 795 insertions, 178 deletions
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 373c3c2c75bc..35f4138a54dc 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -568,8 +568,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
+ smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
+ nbuckets, GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
bpf_map_area_free(smap);
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 740bdb045b14..fa22ec79ac0e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3324,12 +3324,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
return NULL;
}
-static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
- const struct btf_type *t, int comp_idx,
- u32 off, int sz, struct btf_field_info *info)
+static int
+btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
+ const struct btf_type *t, int comp_idx, u32 off,
+ int sz, struct btf_field_info *info,
+ enum btf_field_type head_type)
{
+ const char *node_field_name;
const char *value_type;
- const char *list_node;
s32 id;
if (!__btf_type_is_struct(t))
@@ -3339,26 +3341,32 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
if (!value_type)
return -EINVAL;
- list_node = strstr(value_type, ":");
- if (!list_node)
+ node_field_name = strstr(value_type, ":");
+ if (!node_field_name)
return -EINVAL;
- value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+ value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
kfree(value_type);
if (id < 0)
return id;
- list_node++;
- if (str_is_empty(list_node))
+ node_field_name++;
+ if (str_is_empty(node_field_name))
return -EINVAL;
- info->type = BPF_LIST_HEAD;
+ info->type = head_type;
info->off = off;
info->graph_root.value_btf_id = id;
- info->graph_root.node_name = list_node;
+ info->graph_root.node_name = node_field_name;
return BTF_FIELD_FOUND;
}
+#define field_mask_test_name(field_type, field_type_str) \
+ if (field_mask & field_type && !strcmp(name, field_type_str)) { \
+ type = field_type; \
+ goto end; \
+ }
+
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
@@ -3382,18 +3390,11 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
goto end;
}
}
- if (field_mask & BPF_LIST_HEAD) {
- if (!strcmp(name, "bpf_list_head")) {
- type = BPF_LIST_HEAD;
- goto end;
- }
- }
- if (field_mask & BPF_LIST_NODE) {
- if (!strcmp(name, "bpf_list_node")) {
- type = BPF_LIST_NODE;
- goto end;
- }
- }
+ field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
+ field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
+ field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
+ field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
+
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
type = BPF_KPTR_REF;
@@ -3406,6 +3407,8 @@ end:
return type;
}
+#undef field_mask_test_name
+
static int btf_find_struct_field(const struct btf *btf,
const struct btf_type *t, u32 field_mask,
struct btf_field_info *info, int info_cnt)
@@ -3438,6 +3441,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3451,8 +3455,11 @@ static int btf_find_struct_field(const struct btf *btf,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, t, member_type, i, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, t, member_type,
+ i, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3499,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3512,8 +3520,11 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, var, var_type,
+ -1, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3615,8 +3626,11 @@ end_btf:
return ret;
}
-static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
- struct btf_field_info *info)
+static int btf_parse_graph_root(const struct btf *btf,
+ struct btf_field *field,
+ struct btf_field_info *info,
+ const char *node_type_name,
+ size_t node_type_align)
{
const struct btf_type *t, *n = NULL;
const struct btf_member *member;
@@ -3638,13 +3652,13 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
n = btf_type_by_id(btf, member->type);
if (!__btf_type_is_struct(n))
return -EINVAL;
- if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+ if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
return -EINVAL;
offset = __btf_member_bit_offset(n, member);
if (offset % 8)
return -EINVAL;
offset /= 8;
- if (offset % __alignof__(struct bpf_list_node))
+ if (offset % node_type_align)
return -EINVAL;
field->graph_root.btf = (struct btf *)btf;
@@ -3656,6 +3670,20 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
return 0;
}
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_list_node",
+ __alignof__(struct bpf_list_node));
+}
+
+static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
+ __alignof__(struct bpf_rb_node));
+}
+
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
@@ -3718,7 +3746,13 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
+ case BPF_RB_ROOT:
+ ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
+ if (ret < 0)
+ goto end;
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
ret = -EFAULT;
@@ -3727,8 +3761,33 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->cnt++;
}
- /* bpf_list_head requires bpf_spin_lock */
- if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+ /* bpf_{list_head, rb_node} require bpf_spin_lock */
+ if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
+ btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* need collection identity for non-owning refs before allowing this
+ *
+ * Consider a node type w/ both list and rb_node fields:
+ * struct node {
+ * struct bpf_list_node l;
+ * struct bpf_rb_node r;
+ * }
+ *
+ * Used like so:
+ * struct node *n = bpf_obj_new(....);
+ * bpf_list_push_front(&list_head, &n->l);
+ * bpf_rbtree_remove(&rb_root, &n->r);
+ *
+ * It should not be possible to rbtree_remove the node since it hasn't
+ * been added to a tree. But push_front converts n to a non-owning
+ * reference, and rbtree_remove accepts the non-owning reference to
+ * a type w/ bpf_rb_node field.
+ */
+ if (btf_record_has_field(rec, BPF_LIST_NODE) &&
+ btf_record_has_field(rec, BPF_RB_NODE)) {
ret = -EINVAL;
goto end;
}
@@ -3739,22 +3798,28 @@ end:
return ERR_PTR(ret);
}
+#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
+#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
+
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
- /* There are two owning types, kptr_ref and bpf_list_head. The former
- * only supports storing kernel types, which can never store references
- * to program allocated local types, atleast not yet. Hence we only need
- * to ensure that bpf_list_head ownership does not form cycles.
+ /* There are three types that signify ownership of some other type:
+ * kptr_ref, bpf_list_head, bpf_rb_root.
+ * kptr_ref only supports storing kernel types, which can't store
+ * references to program allocated local types.
+ *
+ * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
+ * does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & BPF_LIST_HEAD))
+ if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@@ -3762,39 +3827,47 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
return -EFAULT;
rec->fields[i].graph_root.value_rec = meta->record;
- if (!(rec->field_mask & BPF_LIST_NODE))
+ /* We need to set value_rec for all root types, but no need
+ * to check ownership cycle for a type unless it's also a
+ * node type.
+ */
+ if (!(rec->field_mask & GRAPH_NODE_MASK))
continue;
/* We need to ensure ownership acyclicity among all types. The
* proper way to do it would be to topologically sort all BTF
* IDs based on the ownership edges, since there can be multiple
- * bpf_list_head in a type. Instead, we use the following
- * reasoning:
+ * bpf_{list_head,rb_node} in a type. Instead, we use the
+ * following resaoning:
*
* - A type can only be owned by another type in user BTF if it
- * has a bpf_list_node.
+ * has a bpf_{list,rb}_node. Let's call these node types.
* - A type can only _own_ another type in user BTF if it has a
- * bpf_list_head.
+ * bpf_{list_head,rb_root}. Let's call these root types.
*
- * We ensure that if a type has both bpf_list_head and
- * bpf_list_node, its element types cannot be owning types.
+ * We ensure that if a type is both a root and node, its
+ * element types cannot be root types.
*
* To ensure acyclicity:
*
- * When A only has bpf_list_head, ownership chain can be:
+ * When A is an root type but not a node, its ownership
+ * chain can be:
* A -> B -> C
* Where:
- * - B has both bpf_list_head and bpf_list_node.
- * - C only has bpf_list_node.
+ * - A is an root, e.g. has bpf_rb_root.
+ * - B is both a root and node, e.g. has bpf_rb_node and
+ * bpf_list_head.
+ * - C is only an root, e.g. has bpf_list_node
*
- * When A has both bpf_list_head and bpf_list_node, some other
- * type already owns it in the BTF domain, hence it can not own
- * another owning type through any of the bpf_list_head edges.
+ * When A is both a root and node, some other type already
+ * owns it in the BTF domain, hence it can not own
+ * another root type through any of the ownership edges.
* A -> B
* Where:
- * - B only has bpf_list_node.
+ * - A is both an root and node.
+ * - B is only an node.
*/
- if (meta->record->field_mask & BPF_LIST_HEAD)
+ if (meta->record->field_mask & GRAPH_ROOT_MASK)
return -ELOOP;
}
return 0;
@@ -5256,6 +5329,8 @@ static const char *alloc_obj_fields[] = {
"bpf_spin_lock",
"bpf_list_head",
"bpf_list_node",
+ "bpf_rb_root",
+ "bpf_rb_node",
};
static struct btf_struct_metas *
@@ -5329,7 +5404,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
- record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
+ record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
+ BPF_RB_ROOT | BPF_RB_NODE, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5593,6 +5669,7 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
if (!ctx_struct)
/* should not happen */
return NULL;
+again:
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
if (!ctx_tname) {
/* should not happen */
@@ -5606,8 +5683,16 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
- if (strcmp(ctx_tname, tname))
- return NULL;
+ if (strcmp(ctx_tname, tname)) {
+ /* bpf_user_pt_regs_t is a typedef, so resolve it to
+ * underlying struct and check name again
+ */
+ if (!btf_type_is_modifier(ctx_struct))
+ return NULL;
+ while (btf_type_is_modifier(ctx_struct))
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type);
+ goto again;
+ }
return ctx_type;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 16da51093aff..3390961c4e10 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -35,6 +35,7 @@
#include <linux/bpf_verifier.h>
#include <linux/nodemask.h>
#include <linux/bpf_mem_alloc.h>
+#include <linux/memcontrol.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -87,7 +88,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -96,12 +97,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
+ aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
if (aux == NULL) {
vfree(fp);
return NULL;
}
- fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
+ fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
if (!fp->active) {
vfree(fp);
kfree(aux);
@@ -126,7 +127,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog *prog;
int cpu;
@@ -159,7 +160,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN));
if (!prog->aux->jited_linfo)
return -ENOMEM;
@@ -234,7 +235,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog *fp;
u32 pages;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 66bded144377..5dfcb5ad0d06 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1004,8 +1004,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
- check_and_init_map_value(&htab->map,
- l_new->key + round_up(key_size, 8));
}
memcpy(l_new->key, key, key_size);
@@ -1592,6 +1590,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
else
copy_map_value(map, value, l->key +
roundup_key_size);
+ /* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
}
@@ -1792,6 +1791,7 @@ again_nocopy:
true);
else
copy_map_value(map, dst_val, value);
+ /* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, dst_val);
}
if (do_delete) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2dae44581922..5b278a38ae58 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1772,6 +1772,46 @@ unlock:
}
}
+/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
+ * 'rb_node *', so field name of rb_node within containing struct is not
+ * needed.
+ *
+ * Since bpf_rb_tree's node type has a corresponding struct btf_field with
+ * graph_root.node_offset, it's not necessary to know field name
+ * or type of node struct
+ */
+#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
+ for (pos = rb_first_postorder(root); \
+ pos && ({ n = rb_next_postorder(pos); 1; }); \
+ pos = n)
+
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+ struct bpf_spin_lock *spin_lock)
+{
+ struct rb_root_cached orig_root, *root = rb_root;
+ struct rb_node *pos, *n;
+ void *obj;
+
+ BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
+ BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
+
+ __bpf_spin_lock_irqsave(spin_lock);
+ orig_root = *root;
+ *root = RB_ROOT_CACHED;
+ __bpf_spin_unlock_irqrestore(spin_lock);
+
+ bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
+ obj = pos;
+ obj -= field->graph_root.node_offset;
+
+ bpf_obj_free_fields(field->graph_root.value_rec, obj);
+
+ migrate_disable();
+ bpf_mem_free(&bpf_global_ma, obj);
+ migrate_enable();
+ }
+}
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
@@ -1844,6 +1884,56 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+ struct rb_node *n = (struct rb_node *)node;
+
+ rb_erase_cached(n, r);
+ RB_CLEAR_NODE(n);
+ return (struct bpf_rb_node *)n;
+}
+
+/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
+ * program
+ */
+static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ void *less)
+{
+ struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
+ bpf_callback_t cb = (bpf_callback_t)less;
+ struct rb_node *parent = NULL;
+ bool leftmost = true;
+
+ while (*link) {
+ parent = *link;
+ if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node((struct rb_node *)node, parent, link);
+ rb_insert_color_cached((struct rb_node *)node,
+ (struct rb_root_cached *)root, leftmost);
+}
+
+__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ __bpf_rbtree_add(root, node, (void *)less);
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)rb_first_cached(r);
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -2068,6 +2158,10 @@ BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_rbtree_add)
+BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1db156405b68..5fcdacbb8439 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -143,7 +143,7 @@ static void *__alloc(struct bpf_mem_cache *c, int node)
return obj;
}
- return kmalloc_node(c->unit_size, flags, node);
+ return kmalloc_node(c->unit_size, flags | __GFP_ZERO, node);
}
static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
@@ -395,7 +395,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
unit_size = size;
#ifdef CONFIG_MEMCG_KMEM
- objcg = get_obj_cgroup_from_current();
+ if (memcg_bpf_enabled())
+ objcg = get_obj_cgroup_from_current();
#endif
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(pc, cpu);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bcc97613de76..e3fcdc9836a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -309,7 +309,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
* __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
+ gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO);
unsigned int flags = 0;
unsigned long align = 1;
void *area;
@@ -418,7 +418,8 @@ static void bpf_map_save_memcg(struct bpf_map *map)
* So we have to check map->objcg for being NULL each time it's
* being used.
*/
- map->objcg = get_obj_cgroup_from_current();
+ if (memcg_bpf_enabled())
+ map->objcg = get_obj_cgroup_from_current();
}
static void bpf_map_release_memcg(struct bpf_map *map)
@@ -464,6 +465,21 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
return ptr;
}
+void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
+ gfp_t flags)
+{
+ struct mem_cgroup *memcg, *old_memcg;
+ void *ptr;
+
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
+ ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+
+ return ptr;
+}
+
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags)
{
@@ -521,9 +537,6 @@ void btf_record_free(struct btf_record *rec)
return;
for (i = 0; i < rec->cnt; i++) {
switch (rec->fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (rec->fields[i].kptr.module)
@@ -532,7 +545,11 @@ void btf_record_free(struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to release for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to release */
break;
default:
WARN_ON_ONCE(1);
@@ -565,9 +582,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
new_rec->cnt = 0;
for (i = 0; i < rec->cnt; i++) {
switch (fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
btf_get(fields[i].kptr.btf);
@@ -578,7 +592,11 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to acquire for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to acquire */
break;
default:
ret = -EFAULT;
@@ -658,7 +676,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
continue;
bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
break;
+ case BPF_RB_ROOT:
+ if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+ continue;
+ bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
WARN_ON_ONCE(1);
@@ -994,7 +1018,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EINVAL;
map->record = btf_parse_fields(btf, value_type,
- BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
+ BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
+ BPF_RB_ROOT,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1042,6 +1067,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
break;
case BPF_LIST_HEAD:
+ case BPF_RB_ROOT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 388245e8826e..272563a0b770 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -190,6 +190,10 @@ struct bpf_verifier_stack_elem {
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
+static int ref_set_non_owning(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
@@ -457,6 +461,11 @@ static bool type_is_ptr_alloc_obj(u32 type)
return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
}
+static bool type_is_non_owning_ref(u32 type)
+{
+ return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
@@ -1073,6 +1082,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose_a("id=%d", reg->id);
if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
+ if (type_is_non_owning_ref(reg->type))
+ verbose_a("%s", "non_own_ref");
if (t != SCALAR_VALUE)
verbose_a("off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -1632,6 +1643,16 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
reg->type &= ~PTR_MAYBE_NULL;
}
+static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
+ struct btf_field_graph_root *ds_head)
+{
+ __mark_reg_known_zero(&regs[regno]);
+ regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[regno].btf = ds_head->btf;
+ regs[regno].btf_id = ds_head->value_btf_id;
+ regs[regno].off = ds_head->node_offset;
+}
+
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
return type_is_pkt_pointer(reg->type);
@@ -3452,6 +3473,11 @@ static void save_register_state(struct bpf_func_state *state,
scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
}
+static bool is_bpf_st_mem(struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
+}
+
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -3463,8 +3489,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
{
struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
- u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
struct bpf_reg_state *reg = NULL;
+ u32 dst_reg = insn->dst_reg;
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
@@ -3517,6 +3544,13 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
}
save_register_state(state, spi, reg, size);
+ } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
+ insn->imm != 0 && env->bpf_capable) {
+ struct bpf_reg_state fake_reg = {};
+
+ __mark_reg_known(&fake_reg, (u32)insn->imm);
+ fake_reg.type = SCALAR_VALUE;
+ save_register_state(state, spi, &fake_reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -3551,7 +3585,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
/* when we zero initialize stack slots mark them as such */
- if (reg && register_is_null(reg)) {
+ if ((reg && register_is_null(reg)) ||
+ (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
/* backtracking doesn't work for STACK_ZERO yet. */
err = mark_chain_precision(env, value_regno);
if (err)
@@ -3596,6 +3631,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
int min_off, max_off;
int i, err;
struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
bool writing_zero = false;
/* set if the fact that we're writing a zero is used to let any
* stack slots remain STACK_ZERO
@@ -3608,7 +3644,8 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
max_off = ptr_reg->smax_value + off + size;
if (value_regno >= 0)
value_reg = &cur->regs[value_regno];
- if (value_reg && register_is_null(value_reg))
+ if ((value_reg && register_is_null(value_reg)) ||
+ (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true;
err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
@@ -5052,7 +5089,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
- if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
+ if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
+ !reg->ref_obj_id) {
verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
return -EFAULT;
}
@@ -6042,9 +6080,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
cur->active_lock.ptr = btf;
cur->active_lock.id = reg->id;
} else {
- struct bpf_func_state *fstate = cur_func(env);
void *ptr;
- int i;
if (map)
ptr = map;
@@ -6060,25 +6096,11 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
verbose(env, "bpf_spin_unlock of different lock\n");
return -EINVAL;
}
- cur->active_lock.ptr = NULL;
- cur->active_lock.id = 0;
- for (i = fstate->acquired_refs - 1; i >= 0; i--) {
- int err;
+ invalidate_non_owning_refs(env);
- /* Complain on error because this reference state cannot
- * be freed before this point, as bpf_spin_lock critical
- * section does not allow functions that release the
- * allocated object immediately.
- */
- if (!fstate->refs[i].release_on_unlock)
- continue;
- err = release_reference(env, fstate->refs[i].id);
- if (err) {
- verbose(env, "failed to release release_on_unlock reference");
- return err;
- }
- }
+ cur->active_lock.ptr = NULL;
+ cur->active_lock.id = 0;
}
return 0;
}
@@ -6546,6 +6568,23 @@ found:
return 0;
}
+static struct btf_field *
+reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
+{
+ struct btf_field *field;
+ struct btf_record *rec;
+
+ rec = reg_btf_record(reg);
+ if (!rec)
+ return NULL;
+
+ field = btf_record_find(rec, off, fields);
+ if (!field)
+ return NULL;
+
+ return field;
+}
+
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type)
@@ -6567,6 +6606,18 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
*/
if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
return 0;
+
+ if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
+ if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
+ return __check_ptr_off_reg(env, reg, regno, true);
+
+ verbose(env, "R%d must have zero offset when passed to release func\n",
+ regno);
+ verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
+ kernel_type_name(reg->btf, reg->btf_id), reg->off);
+ return -EINVAL;
+ }
+
/* Doing check_ptr_off_reg check for the offset will catch this
* because fixed_off_ok is false, but checking here allows us
* to give the user a better error message.
@@ -6601,6 +6652,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
* can be non-zero. This was already checked above. So pass
@@ -6812,6 +6864,10 @@ skip_type_check:
meta->ret_btf_id = reg->btf_id;
break;
case ARG_PTR_TO_SPIN_LOCK:
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
+ return -EACCES;
+ }
if (meta->func_id == BPF_FUNC_spin_lock) {
err = process_spin_lock(env, regno, true);
if (err)
@@ -7363,6 +7419,17 @@ static int release_reference(struct bpf_verifier_env *env,
return 0;
}
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
+{
+ struct bpf_func_state *unused;
+ struct bpf_reg_state *reg;
+
+ bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+ if (type_is_non_owning_ref(reg->type))
+ __mark_reg_unknown(env, reg);
+ }));
+}
+
static void clear_caller_saved_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *regs)
{
@@ -7384,6 +7451,8 @@ static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx);
+static bool is_callback_calling_kfunc(u32 btf_id);
+
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
@@ -7438,10 +7507,18 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* interested in validating only BPF helpers that can call subprogs as
* callbacks
*/
- if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
- verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
- return -EFAULT;
+ if (set_callee_state_cb != set_callee_state) {
+ if (bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_kfunc(insn->imm)) {
+ verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ } else if (!bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_function(insn->imm)) { /* helper */
+ verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
}
if (insn->code == (BPF_JMP | BPF_CALL) &&
@@ -7706,6 +7783,63 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
+ *
+ * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
+ * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
+ * by this point, so look at 'root'
+ */
+ struct btf_field *field;
+
+ field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
+ BPF_RB_ROOT);
+ if (!field || !field->graph_root.value_btf_id)
+ return -EFAULT;
+
+ mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
+ mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
+
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ callee->in_callback_fn = true;
+ callee->callback_ret_range = tnum_range(0, 1);
+ return 0;
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id);
+
+/* Are we currently verifying the callback for a rbtree helper that must
+ * be called with lock held? If so, no need to complain about unreleased
+ * lock
+ */
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_insn *insn = env->prog->insnsi;
+ struct bpf_func_state *callee;
+ int kfunc_btf_id;
+
+ if (!state->curframe)
+ return false;
+
+ callee = state->frame[state->curframe];
+
+ if (!callee->in_callback_fn)
+ return false;
+
+ kfunc_btf_id = insn[callee->callsite].imm;
+ return is_rbtree_lock_required_kfunc(kfunc_btf_id);
+}
+
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -8474,6 +8608,7 @@ struct bpf_kfunc_call_arg_meta {
bool r0_rdonly;
u32 ret_btf_id;
u64 r0_size;
+ u32 subprogno;
struct {
u64 value;
bool found;
@@ -8485,6 +8620,9 @@ struct bpf_kfunc_call_arg_meta {
struct {
struct btf_field *field;
} arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
};
static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
@@ -8596,12 +8734,16 @@ enum {
KF_ARG_DYNPTR_ID,
KF_ARG_LIST_HEAD_ID,
KF_ARG_LIST_NODE_ID,
+ KF_ARG_RB_ROOT_ID,
+ KF_ARG_RB_NODE_ID,
};
BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct, bpf_dynptr_kern)
BTF_ID(struct, bpf_list_head)
BTF_ID(struct, bpf_list_node)
+BTF_ID(struct, bpf_rb_root)
+BTF_ID(struct, bpf_rb_node)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -8635,6 +8777,28 @@ static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
}
+static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
+}
+
+static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
+}
+
+static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
+ const struct btf_param *arg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
+ if (!t)
+ return false;
+
+ return true;
+}
+
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
const struct btf *btf,
@@ -8694,6 +8858,9 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
KF_ARG_PTR_TO_MEM,
KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
+ KF_ARG_PTR_TO_CALLBACK,
+ KF_ARG_PTR_TO_RB_ROOT,
+ KF_ARG_PTR_TO_RB_NODE,
};
enum special_kfunc_type {
@@ -8707,6 +8874,9 @@ enum special_kfunc_type {
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
KF_bpf_rcu_read_unlock,
+ KF_bpf_rbtree_remove,
+ KF_bpf_rbtree_add,
+ KF_bpf_rbtree_first,
};
BTF_SET_START(special_kfunc_set)
@@ -8718,6 +8888,9 @@ BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -8731,6 +8904,9 @@ BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -8792,6 +8968,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_LIST_NODE;
+ if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_ROOT;
+
+ if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_NODE;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -8801,6 +8983,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
return KF_ARG_PTR_TO_BTF_ID;
}
+ if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_CALLBACK;
+
if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
arg_mem_size = true;
@@ -8915,38 +9100,54 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
return 0;
}
-static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
+static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = cur_func(env);
+ struct bpf_verifier_state *state = env->cur_state;
+
+ if (!state->active_lock.ptr) {
+ verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
+ return -EFAULT;
+ }
+
+ if (type_flag(reg->type) & NON_OWN_REF) {
+ verbose(env, "verifier internal error: NON_OWN_REF already set\n");
+ return -EFAULT;
+ }
+
+ reg->type |= NON_OWN_REF;
+ return 0;
+}
+
+static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
+{
+ struct bpf_func_state *state, *unused;
struct bpf_reg_state *reg;
int i;
- /* bpf_spin_lock only allows calling list_push and list_pop, no BPF
- * subprogs, no global functions. This means that the references would
- * not be released inside the critical section but they may be added to
- * the reference state, and the acquired_refs are never copied out for a
- * different frame as BPF to BPF calls don't work in bpf_spin_lock
- * critical sections.
- */
+ state = cur_func(env);
+
if (!ref_obj_id) {
- verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
+ verbose(env, "verifier internal error: ref_obj_id is zero for "
+ "owning -> non-owning conversion\n");
return -EFAULT;
}
+
for (i = 0; i < state->acquired_refs; i++) {
- if (state->refs[i].id == ref_obj_id) {
- if (state->refs[i].release_on_unlock) {
- verbose(env, "verifier internal error: expected false release_on_unlock");
- return -EFAULT;
+ if (state->refs[i].id != ref_obj_id)
+ continue;
+
+ /* Clear ref_obj_id here so release_reference doesn't clobber
+ * the whole reg
+ */
+ bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+ if (reg->ref_obj_id == ref_obj_id) {
+ reg->ref_obj_id = 0;
+ ref_set_non_owning(env, reg);
}
- state->refs[i].release_on_unlock = true;
- /* Now mark everyone sharing same ref_obj_id as untrusted */
- bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg->ref_obj_id == ref_obj_id)
- reg->type |= PTR_UNTRUSTED;
- }));
- return 0;
- }
+ }));
+ return 0;
}
+
verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
return -EFAULT;
}
@@ -9032,102 +9233,226 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_list_pop_back];
}
-static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, u32 regno,
- struct bpf_kfunc_call_arg_meta *meta)
+static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+}
+
+static bool is_bpf_graph_api_kfunc(u32 btf_id)
+{
+ return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool is_callback_calling_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id)
+{
+ return is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
+ enum btf_field_type head_field_type,
+ u32 kfunc_btf_id)
+{
+ bool ret;
+
+ switch (head_field_type) {
+ case BPF_LIST_HEAD:
+ ret = is_bpf_list_api_kfunc(kfunc_btf_id);
+ break;
+ case BPF_RB_ROOT:
+ ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
+ btf_field_type_name(head_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
+ btf_field_type_name(head_field_type));
+ return ret;
+}
+
+static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
+ enum btf_field_type node_field_type,
+ u32 kfunc_btf_id)
+{
+ bool ret;
+
+ switch (node_field_type) {
+ case BPF_LIST_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
+ break;
+ case BPF_RB_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
+ btf_field_type_name(node_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
+ btf_field_type_name(node_field_type));
+ return ret;
+}
+
+static int
+__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ struct btf_field **head_field)
{
+ const char *head_type_name;
struct btf_field *field;
struct btf_record *rec;
- u32 list_head_off;
+ u32 head_off;
- if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
- verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
+ return -EFAULT;
+
+ head_type_name = btf_field_type_name(head_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, head_type_name);
return -EINVAL;
}
rec = reg_btf_record(reg);
- list_head_off = reg->off + reg->var_off.value;
- field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
+ head_off = reg->off + reg->var_off.value;
+ field = btf_record_find(rec, head_off, head_field_type);
if (!field) {
- verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
+ verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
return -EINVAL;
}
/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
if (check_reg_allocation_locked(env, reg)) {
- verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
- rec->spin_lock_off);
+ verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
+ rec->spin_lock_off, head_type_name);
return -EINVAL;
}
- if (meta->arg_list_head.field) {
- verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
+ if (*head_field) {
+ verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
return -EFAULT;
}
- meta->arg_list_head.field = field;
+ *head_field = field;
return 0;
}
-static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno,
struct bpf_kfunc_call_arg_meta *meta)
{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
+ &meta->arg_rbtree_root.field);
+}
+
+static int
+__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ enum btf_field_type node_field_type,
+ struct btf_field **node_field)
+{
+ const char *node_type_name;
const struct btf_type *et, *t;
struct btf_field *field;
- struct btf_record *rec;
- u32 list_node_off;
+ u32 node_off;
- if (meta->btf != btf_vmlinux ||
- (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
- meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
- verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
+ return -EFAULT;
+
+ node_type_name = btf_field_type_name(node_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, node_type_name);
return -EINVAL;
}
- rec = reg_btf_record(reg);
- list_node_off = reg->off + reg->var_off.value;
- field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
- if (!field || field->offset != list_node_off) {
- verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
+ node_off = reg->off + reg->var_off.value;
+ field = reg_find_field_offset(reg, node_off, node_field_type);
+ if (!field || field->offset != node_off) {
+ verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
return -EINVAL;
}
- field = meta->arg_list_head.field;
+ field = *node_field;
et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
t = btf_type_by_id(reg->btf, reg->btf_id);
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
field->graph_root.value_btf_id, true)) {
- verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
+ verbose(env, "operation on %s expects arg#1 %s at offset=%d "
"in struct %s, but arg is at offset=%d in struct %s\n",
+ btf_field_type_name(head_field_type),
+ btf_field_type_name(node_field_type),
field->graph_root.node_offset,
btf_name_by_offset(field->graph_root.btf, et->name_off),
- list_node_off, btf_name_by_offset(reg->btf, t->name_off));
+ node_off, btf_name_by_offset(reg->btf, t->name_off));
return -EINVAL;
}
- if (list_node_off != field->graph_root.node_offset) {
- verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
- list_node_off, field->graph_root.node_offset,
+ if (node_off != field->graph_root.node_offset) {
+ verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
+ node_off, btf_field_type_name(node_field_type),
+ field->graph_root.node_offset,
btf_name_by_offset(field->graph_root.btf, et->name_off));
return -EINVAL;
}
- /* Set arg#1 for expiration after unlock */
- return ref_set_release_on_unlock(env, reg->ref_obj_id);
+
+ return 0;
+}
+
+static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_LIST_HEAD, BPF_LIST_NODE,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_RB_ROOT, BPF_RB_NODE,
+ &meta->arg_rbtree_root.field);
}
static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
@@ -9264,8 +9589,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_DYNPTR:
case KF_ARG_PTR_TO_LIST_HEAD:
case KF_ARG_PTR_TO_LIST_NODE:
+ case KF_ARG_PTR_TO_RB_ROOT:
+ case KF_ARG_PTR_TO_RB_NODE:
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
+ case KF_ARG_PTR_TO_CALLBACK:
/* Trusted by default */
break;
default:
@@ -9342,6 +9670,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_ROOT:
+ if (reg->type != PTR_TO_MAP_VALUE &&
+ reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
+ return -EINVAL;
+ }
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_LIST_NODE:
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
@@ -9355,6 +9697,31 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_NODE:
+ if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
+ if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
+ verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ return -EINVAL;
+ }
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ return -EINVAL;
+ }
+ } else {
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ return -EINVAL;
+ }
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
@@ -9390,6 +9757,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
/* Skip next '__sz' argument */
i++;
break;
+ case KF_ARG_PTR_TO_CALLBACK:
+ meta->subprogno = reg->subprogno;
+ break;
}
}
@@ -9406,11 +9776,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
+ u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock;
struct bpf_kfunc_call_arg_meta meta;
- u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
const struct btf_type *ret_t;
@@ -9505,6 +9875,35 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
+ err = ref_convert_owning_non_owning(env, release_ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
+ func_name, func_id);
+ return err;
+ }
+
+ err = release_reference(env, release_ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_rbtree_add_callback_state);
+ if (err) {
+ verbose(env, "kfunc %s#%d failed callback verification\n",
+ func_name, func_id);
+ return err;
+ }
+ }
+
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -9569,11 +9968,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
struct btf_field *field = meta.arg_list_head.field;
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = field->graph_root.btf;
- regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
- regs[BPF_REG_0].off = field->graph_root.node_offset;
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ struct btf_field *field = meta.arg_rbtree_root.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
@@ -9639,7 +10039,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_ret_null(&meta))
regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ ref_set_non_owning(env, &regs[BPF_REG_0]);
}
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
+ invalidate_non_owning_refs(env);
+
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
@@ -11825,8 +12231,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
*/
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
return;
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
+ if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
+ WARN_ON_ONCE(reg->off))
return;
+
if (is_null) {
reg->type = SCALAR_VALUE;
/* We don't need id and ref_obj_id from this point
@@ -14335,7 +14743,7 @@ static int do_check(struct bpf_verifier_env *env)
if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
(insn->src_reg == BPF_PSEUDO_CALL) ||
(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
- (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
+ (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
verbose(env, "function calls are not allowed while holding a lock\n");
return -EINVAL;
}
@@ -14371,7 +14779,8 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (env->cur_state->active_lock.ptr) {
+ if (env->cur_state->active_lock.ptr &&
+ !in_rbtree_lock_required_cb(env)) {
verbose(env, "bpf_spin_unlock is missing\n");
return -EINVAL;
}
@@ -14633,9 +15042,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+ if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
+ btf_record_has_field(map->record, BPF_RB_ROOT)) {
if (is_tracing_prog_type(prog_type)) {
- verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+ verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
return -EINVAL;
}
}