summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_struct_ops.c75
-rw-r--r--kernel/bpf/btf.c310
-rw-r--r--kernel/bpf/helpers.c119
-rw-r--r--kernel/bpf/syscall.c34
-rw-r--r--kernel/bpf/verifier.c4
5 files changed, 416 insertions, 126 deletions
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 86c7884abaf8..a2cf31b14be4 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/btf_ids.h>
#include <linux/rcupdate_wait.h>
+#include <linux/poll.h>
struct bpf_struct_ops_value {
struct bpf_struct_ops_common_value common;
@@ -56,6 +57,7 @@ struct bpf_struct_ops_map {
struct bpf_struct_ops_link {
struct bpf_link link;
struct bpf_map __rcu *map;
+ wait_queue_head_t wait_hup;
};
static DEFINE_MUTEX(update_mutex);
@@ -757,7 +759,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- err = st_ops->reg(kdata);
+ err = st_ops->reg(kdata, NULL);
if (likely(!err)) {
/* This refcnt increment on the map here after
* 'st_ops->reg()' is secure since the state of the
@@ -805,7 +807,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
BPF_STRUCT_OPS_STATE_TOBEFREE);
switch (prev_state) {
case BPF_STRUCT_OPS_STATE_INUSE:
- st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL);
bpf_map_put(map);
return 0;
case BPF_STRUCT_OPS_STATE_TOBEFREE:
@@ -1057,10 +1059,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
st_map = (struct bpf_struct_ops_map *)
rcu_dereference_protected(st_link->map, true);
if (st_map) {
- /* st_link->map can be NULL if
- * bpf_struct_ops_link_create() fails to register.
- */
- st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
bpf_map_put(&st_map->map);
}
kfree(st_link);
@@ -1075,7 +1074,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
st_link = container_of(link, struct bpf_struct_ops_link, link);
rcu_read_lock();
map = rcu_dereference(st_link->map);
- seq_printf(seq, "map_id:\t%d\n", map->id);
+ if (map)
+ seq_printf(seq, "map_id:\t%d\n", map->id);
rcu_read_unlock();
}
@@ -1088,7 +1088,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
st_link = container_of(link, struct bpf_struct_ops_link, link);
rcu_read_lock();
map = rcu_dereference(st_link->map);
- info->struct_ops.map_id = map->id;
+ if (map)
+ info->struct_ops.map_id = map->id;
rcu_read_unlock();
return 0;
}
@@ -1113,6 +1114,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
mutex_lock(&update_mutex);
old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+ if (!old_map) {
+ err = -ENOLINK;
+ goto err_out;
+ }
if (expected_old_map && old_map != expected_old_map) {
err = -EPERM;
goto err_out;
@@ -1125,7 +1130,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
goto err_out;
}
- err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
+ err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link);
if (err)
goto err_out;
@@ -1139,11 +1144,53 @@ err_out:
return err;
}
+static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
+{
+ struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *map;
+
+ mutex_lock(&update_mutex);
+
+ map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+ if (!map) {
+ mutex_unlock(&update_mutex);
+ return 0;
+ }
+ st_map = container_of(map, struct bpf_struct_ops_map, map);
+
+ st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
+
+ RCU_INIT_POINTER(st_link->map, NULL);
+ /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+ * bpf_map_inc() in bpf_struct_ops_map_link_update().
+ */
+ bpf_map_put(&st_map->map);
+
+ mutex_unlock(&update_mutex);
+
+ wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP);
+
+ return 0;
+}
+
+static __poll_t bpf_struct_ops_map_link_poll(struct file *file,
+ struct poll_table_struct *pts)
+{
+ struct bpf_struct_ops_link *st_link = file->private_data;
+
+ poll_wait(file, &st_link->wait_hup, pts);
+
+ return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP;
+}
+
static const struct bpf_link_ops bpf_struct_ops_map_lops = {
.dealloc = bpf_struct_ops_map_link_dealloc,
+ .detach = bpf_struct_ops_map_link_detach,
.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
.update_map = bpf_struct_ops_map_link_update,
+ .poll = bpf_struct_ops_map_link_poll,
};
int bpf_struct_ops_link_create(union bpf_attr *attr)
@@ -1176,13 +1223,21 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
if (err)
goto err_out;
- err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
+ init_waitqueue_head(&link->wait_hup);
+
+ /* Hold the update_mutex such that the subsystem cannot
+ * do link->ops->detach() before the link is fully initialized.
+ */
+ mutex_lock(&update_mutex);
+ err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
if (err) {
+ mutex_unlock(&update_mutex);
bpf_link_cleanup(&link_primer);
link = NULL;
goto err_out;
}
RCU_INIT_POINTER(link->map, map);
+ mutex_unlock(&update_mutex);
return bpf_link_settle(&link_primer);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 821063660d9f..7928d920056f 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3442,10 +3442,12 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
goto end; \
}
-static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
+static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
+ u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
int type = 0;
+ const char *name = __btf_name_by_offset(btf, var_type->name_off);
if (field_mask & BPF_SPIN_LOCK) {
if (!strcmp(name, "bpf_spin_lock")) {
@@ -3481,7 +3483,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */
- if (field_mask & BPF_KPTR) {
+ if (field_mask & BPF_KPTR && !__btf_type_is_struct(var_type)) {
type = BPF_KPTR_REF;
goto end;
}
@@ -3494,140 +3496,232 @@ end:
#undef field_mask_test_name
+/* Repeat a number of fields for a specified number of times.
+ *
+ * Copy the fields starting from the first field and repeat them for
+ * repeat_cnt times. The fields are repeated by adding the offset of each
+ * field with
+ * (i + 1) * elem_size
+ * where i is the repeat index and elem_size is the size of an element.
+ */
+static int btf_repeat_fields(struct btf_field_info *info,
+ u32 field_cnt, u32 repeat_cnt, u32 elem_size)
+{
+ u32 i, j;
+ u32 cur;
+
+ /* Ensure not repeating fields that should not be repeated. */
+ for (i = 0; i < field_cnt; i++) {
+ switch (info[i].type) {
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
+ case BPF_LIST_HEAD:
+ case BPF_RB_ROOT:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ cur = field_cnt;
+ for (i = 0; i < repeat_cnt; i++) {
+ memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0]));
+ for (j = 0; j < field_cnt; j++)
+ info[cur++].off += (i + 1) * elem_size;
+ }
+
+ return 0;
+}
+
static int btf_find_struct_field(const struct btf *btf,
const struct btf_type *t, u32 field_mask,
- struct btf_field_info *info, int info_cnt)
+ struct btf_field_info *info, int info_cnt,
+ u32 level);
+
+/* Find special fields in the struct type of a field.
+ *
+ * This function is used to find fields of special types that is not a
+ * global variable or a direct field of a struct type. It also handles the
+ * repetition if it is the element type of an array.
+ */
+static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *t,
+ u32 off, u32 nelems,
+ u32 field_mask, struct btf_field_info *info,
+ int info_cnt, u32 level)
{
- int ret, idx = 0, align, sz, field_type;
- const struct btf_member *member;
+ int ret, err, i;
+
+ level++;
+ if (level >= MAX_RESOLVE_DEPTH)
+ return -E2BIG;
+
+ ret = btf_find_struct_field(btf, t, field_mask, info, info_cnt, level);
+
+ if (ret <= 0)
+ return ret;
+
+ /* Shift the offsets of the nested struct fields to the offsets
+ * related to the container.
+ */
+ for (i = 0; i < ret; i++)
+ info[i].off += off;
+
+ if (nelems > 1) {
+ err = btf_repeat_fields(info, ret, nelems - 1, t->size);
+ if (err == 0)
+ ret *= nelems;
+ else
+ ret = err;
+ }
+
+ return ret;
+}
+
+static int btf_find_field_one(const struct btf *btf,
+ const struct btf_type *var,
+ const struct btf_type *var_type,
+ int var_idx,
+ u32 off, u32 expected_size,
+ u32 field_mask, u32 *seen_mask,
+ struct btf_field_info *info, int info_cnt,
+ u32 level)
+{
+ int ret, align, sz, field_type;
struct btf_field_info tmp;
+ const struct btf_array *array;
+ u32 i, nelems = 1;
+
+ /* Walk into array types to find the element type and the number of
+ * elements in the (flattened) array.
+ */
+ for (i = 0; i < MAX_RESOLVE_DEPTH && btf_type_is_array(var_type); i++) {
+ array = btf_array(var_type);
+ nelems *= array->nelems;
+ var_type = btf_type_by_id(btf, array->type);
+ }
+ if (i == MAX_RESOLVE_DEPTH)
+ return -E2BIG;
+ if (nelems == 0)
+ return 0;
+
+ field_type = btf_get_field_type(btf, var_type,
+ field_mask, seen_mask, &align, &sz);
+ /* Look into variables of struct types */
+ if (!field_type && __btf_type_is_struct(var_type)) {
+ sz = var_type->size;
+ if (expected_size && expected_size != sz * nelems)
+ return 0;
+ ret = btf_find_nested_struct(btf, var_type, off, nelems, field_mask,
+ &info[0], info_cnt, level);
+ return ret;
+ }
+
+ if (field_type == 0)
+ return 0;
+ if (field_type < 0)
+ return field_type;
+
+ if (expected_size && expected_size != sz * nelems)
+ return 0;
+ if (off % align)
+ return 0;
+
+ switch (field_type) {
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ case BPF_WORKQUEUE:
+ case BPF_LIST_NODE:
+ case BPF_RB_NODE:
+ case BPF_REFCOUNT:
+ ret = btf_find_struct(btf, var_type, off, sz, field_type,
+ info_cnt ? &info[0] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
+ ret = btf_find_kptr(btf, var_type, off, sz,
+ info_cnt ? &info[0] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BPF_LIST_HEAD:
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, var, var_type,
+ var_idx, off, sz,
+ info_cnt ? &info[0] : &tmp,
+ field_type);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ if (ret == BTF_FIELD_IGNORE)
+ return 0;
+ if (nelems > info_cnt)
+ return -E2BIG;
+ if (nelems > 1) {
+ ret = btf_repeat_fields(info, 1, nelems - 1, sz);
+ if (ret < 0)
+ return ret;
+ }
+ return nelems;
+}
+
+static int btf_find_struct_field(const struct btf *btf,
+ const struct btf_type *t, u32 field_mask,
+ struct btf_field_info *info, int info_cnt,
+ u32 level)
+{
+ int ret, idx = 0;
+ const struct btf_member *member;
u32 i, off, seen_mask = 0;
for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
- field_type = btf_get_field_type(__btf_name_by_offset(btf, member_type->name_off),
- field_mask, &seen_mask, &align, &sz);
- if (field_type == 0)
- continue;
- if (field_type < 0)
- return field_type;
-
off = __btf_member_bit_offset(t, member);
if (off % 8)
/* valid C code cannot generate such BTF */
return -EINVAL;
off /= 8;
- if (off % align)
- continue;
-
- switch (field_type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- case BPF_WORKQUEUE:
- case BPF_LIST_NODE:
- case BPF_RB_NODE:
- case BPF_REFCOUNT:
- ret = btf_find_struct(btf, member_type, off, sz, field_type,
- idx < info_cnt ? &info[idx] : &tmp);
- if (ret < 0)
- return ret;
- break;
- case BPF_KPTR_UNREF:
- case BPF_KPTR_REF:
- case BPF_KPTR_PERCPU:
- ret = btf_find_kptr(btf, member_type, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
- if (ret < 0)
- return ret;
- break;
- case BPF_LIST_HEAD:
- case BPF_RB_ROOT:
- ret = btf_find_graph_root(btf, t, member_type,
- i, off, sz,
- idx < info_cnt ? &info[idx] : &tmp,
- field_type);
- if (ret < 0)
- return ret;
- break;
- default:
- return -EFAULT;
- }
- if (ret == BTF_FIELD_IGNORE)
- continue;
- if (idx >= info_cnt)
- return -E2BIG;
- ++idx;
+ ret = btf_find_field_one(btf, t, member_type, i,
+ off, 0,
+ field_mask, &seen_mask,
+ &info[idx], info_cnt - idx, level);
+ if (ret < 0)
+ return ret;
+ idx += ret;
}
return idx;
}
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
u32 field_mask, struct btf_field_info *info,
- int info_cnt)
+ int info_cnt, u32 level)
{
- int ret, idx = 0, align, sz, field_type;
+ int ret, idx = 0;
const struct btf_var_secinfo *vsi;
- struct btf_field_info tmp;
u32 i, off, seen_mask = 0;
for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
- field_type = btf_get_field_type(__btf_name_by_offset(btf, var_type->name_off),
- field_mask, &seen_mask, &align, &sz);
- if (field_type == 0)
- continue;
- if (field_type < 0)
- return field_type;
-
off = vsi->offset;
- if (vsi->size != sz)
- continue;
- if (off % align)
- continue;
-
- switch (field_type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- case BPF_WORKQUEUE:
- case BPF_LIST_NODE:
- case BPF_RB_NODE:
- case BPF_REFCOUNT:
- ret = btf_find_struct(btf, var_type, off, sz, field_type,
- idx < info_cnt ? &info[idx] : &tmp);
- if (ret < 0)
- return ret;
- break;
- case BPF_KPTR_UNREF:
- case BPF_KPTR_REF:
- case BPF_KPTR_PERCPU:
- ret = btf_find_kptr(btf, var_type, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
- if (ret < 0)
- return ret;
- break;
- case BPF_LIST_HEAD:
- case BPF_RB_ROOT:
- ret = btf_find_graph_root(btf, var, var_type,
- -1, off, sz,
- idx < info_cnt ? &info[idx] : &tmp,
- field_type);
- if (ret < 0)
- return ret;
- break;
- default:
- return -EFAULT;
- }
-
- if (ret == BTF_FIELD_IGNORE)
- continue;
- if (idx >= info_cnt)
- return -E2BIG;
- ++idx;
+ ret = btf_find_field_one(btf, var, var_type, -1, off, vsi->size,
+ field_mask, &seen_mask,
+ &info[idx], info_cnt - idx,
+ level);
+ if (ret < 0)
+ return ret;
+ idx += ret;
}
return idx;
}
@@ -3637,9 +3731,9 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
int info_cnt)
{
if (__btf_type_is_struct(t))
- return btf_find_struct_field(btf, t, field_mask, info, info_cnt);
+ return btf_find_struct_field(btf, t, field_mask, info, info_cnt, 0);
else if (btf_type_is_datasec(t))
- return btf_find_datasec_var(btf, t, field_mask, info, info_cnt);
+ return btf_find_datasec_var(btf, t, field_mask, info, info_cnt, 0);
return -EINVAL;
}
@@ -6693,7 +6787,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
for (i = 0; i < rec->cnt; i++) {
struct btf_field *field = &rec->fields[i];
u32 offset = field->offset;
- if (off < offset + btf_field_type_size(field->type) && offset < off + size) {
+ if (off < offset + field->size && offset < off + size) {
bpf_log(log,
"direct access to %s is disallowed\n",
btf_field_type_name(field->type));
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2a69a9a36c0f..6f1abcb4b084 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2744,6 +2744,122 @@ __bpf_kfunc void bpf_preempt_enable(void)
preempt_enable();
}
+struct bpf_iter_bits {
+ __u64 __opaque[2];
+} __aligned(8);
+
+struct bpf_iter_bits_kern {
+ union {
+ unsigned long *bits;
+ unsigned long bits_copy;
+ };
+ u32 nr_bits;
+ int bit;
+} __aligned(8);
+
+/**
+ * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
+ * @it: The new bpf_iter_bits to be created
+ * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
+ * @nr_words: The size of the specified memory area, measured in 8-byte units.
+ * Due to the limitation of memalloc, it can't be greater than 512.
+ *
+ * This function initializes a new bpf_iter_bits structure for iterating over
+ * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
+ * copies the data of the memory area to the newly created bpf_iter_bits @it for
+ * subsequent iteration operations.
+ *
+ * On success, 0 is returned. On failure, ERR is returned.
+ */
+__bpf_kfunc int
+bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words)
+{
+ struct bpf_iter_bits_kern *kit = (void *)it;
+ u32 nr_bytes = nr_words * sizeof(u64);
+ u32 nr_bits = BYTES_TO_BITS(nr_bytes);
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
+ BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
+ __alignof__(struct bpf_iter_bits));
+
+ kit->nr_bits = 0;
+ kit->bits_copy = 0;
+ kit->bit = -1;
+
+ if (!unsafe_ptr__ign || !nr_words)
+ return -EINVAL;
+
+ /* Optimization for u64 mask */
+ if (nr_bits == 64) {
+ err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
+ if (err)
+ return -EFAULT;
+
+ kit->nr_bits = nr_bits;
+ return 0;
+ }
+
+ /* Fallback to memalloc */
+ kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
+ if (!kit->bits)
+ return -ENOMEM;
+
+ err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign);
+ if (err) {
+ bpf_mem_free(&bpf_global_ma, kit->bits);
+ return err;
+ }
+
+ kit->nr_bits = nr_bits;
+ return 0;
+}
+
+/**
+ * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
+ * @it: The bpf_iter_bits to be checked
+ *
+ * This function returns a pointer to a number representing the value of the
+ * next bit in the bits.
+ *
+ * If there are no further bits available, it returns NULL.
+ */
+__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
+{
+ struct bpf_iter_bits_kern *kit = (void *)it;
+ u32 nr_bits = kit->nr_bits;
+ const unsigned long *bits;
+ int bit;
+
+ if (nr_bits == 0)
+ return NULL;
+
+ bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
+ bit = find_next_bit(bits, nr_bits, kit->bit + 1);
+ if (bit >= nr_bits) {
+ kit->nr_bits = 0;
+ return NULL;
+ }
+
+ kit->bit = bit;
+ return &kit->bit;
+}
+
+/**
+ * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
+ * @it: The bpf_iter_bits to be destroyed
+ *
+ * Destroy the resource associated with the bpf_iter_bits.
+ */
+__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
+{
+ struct bpf_iter_bits_kern *kit = (void *)it;
+
+ if (kit->nr_bits <= 64)
+ return;
+ bpf_mem_free(&bpf_global_ma, kit->bits);
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -2826,6 +2942,9 @@ BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
+BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f45ed6adc092..869265852d51 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3151,6 +3151,13 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
+static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct bpf_link *link = file->private_data;
+
+ return link->ops->poll(file, pts);
+}
+
static const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
@@ -3160,6 +3167,16 @@ static const struct file_operations bpf_link_fops = {
.write = bpf_dummy_write,
};
+static const struct file_operations bpf_link_fops_poll = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_link_show_fdinfo,
+#endif
+ .release = bpf_link_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+ .poll = bpf_link_poll,
+};
+
static int bpf_link_alloc_id(struct bpf_link *link)
{
int id;
@@ -3202,7 +3219,9 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
return id;
}
- file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
+ file = anon_inode_getfile("bpf_link",
+ link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
+ link, O_CLOEXEC);
if (IS_ERR(file)) {
bpf_link_free_id(id);
put_unused_fd(fd);
@@ -3230,7 +3249,9 @@ int bpf_link_settle(struct bpf_link_primer *primer)
int bpf_link_new_fd(struct bpf_link *link)
{
- return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
+ return anon_inode_getfd("bpf-link",
+ link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
+ link, O_CLOEXEC);
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
@@ -3240,7 +3261,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
if (!f.file)
return ERR_PTR(-EBADF);
- if (f.file->f_op != &bpf_link_fops) {
+ if (f.file->f_op != &bpf_link_fops && f.file->f_op != &bpf_link_fops_poll) {
fdput(f);
return ERR_PTR(-EINVAL);
}
@@ -4972,7 +4993,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
uattr);
else if (f.file->f_op == &btf_fops)
err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
- else if (f.file->f_op == &bpf_link_fops)
+ else if (f.file->f_op == &bpf_link_fops || f.file->f_op == &bpf_link_fops_poll)
err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
attr, uattr);
else
@@ -5107,7 +5128,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (!file)
return -EBADF;
- if (file->f_op == &bpf_link_fops) {
+ if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) {
struct bpf_link *link = file->private_data;
if (link->ops == &bpf_raw_tp_link_lops) {
@@ -5417,10 +5438,11 @@ static int link_detach(union bpf_attr *attr)
return ret;
}
-static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
+struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
{
return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL(bpf_link_inc_not_zero);
struct bpf_link *bpf_link_by_id(u32 id)
{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36ef8e96787e..20ac9cfd54dd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5448,7 +5448,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
* this program. To check that [x1, x2) overlaps with [y1, y2),
* it is sufficient to check x1 < y2 && y1 < x2.
*/
- if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
+ if (reg->smin_value + off < p + field->size &&
p < reg->umax_value + off + size) {
switch (field->type) {
case BPF_KPTR_UNREF:
@@ -11648,7 +11648,7 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
node_off = reg->off + reg->var_off.value;
field = reg_find_field_offset(reg, node_off, node_field_type);
- if (!field || field->offset != node_off) {
+ if (!field) {
verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
return -EINVAL;
}