summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/Kconfig6
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c65
-rw-r--r--kernel/bpf/bloom_filter.c6
-rw-r--r--kernel/bpf/bpf_inode_storage.c19
-rw-r--r--kernel/bpf/bpf_iter.c52
-rw-r--r--kernel/bpf/bpf_local_storage.c89
-rw-r--r--kernel/bpf/bpf_lru_list.h1
-rw-r--r--kernel/bpf/bpf_lsm.c38
-rw-r--r--kernel/bpf/bpf_struct_ops.c81
-rw-r--r--kernel/bpf/bpf_task_storage.c19
-rw-r--r--kernel/bpf/btf.c1311
-rw-r--r--kernel/bpf/cgroup.c265
-rw-r--r--kernel/bpf/core.c399
-rw-r--r--kernel/bpf/cpumap.c14
-rw-r--r--kernel/bpf/devmap.c13
-rw-r--r--kernel/bpf/hashtab.c135
-rw-r--r--kernel/bpf/helpers.c268
-rw-r--r--kernel/bpf/inode.c39
-rw-r--r--kernel/bpf/link_iter.c107
-rw-r--r--kernel/bpf/local_storage.c9
-rw-r--r--kernel/bpf/lpm_trie.c6
-rw-r--r--kernel/bpf/map_in_map.c5
-rw-r--r--kernel/bpf/preload/Kconfig7
-rw-r--r--kernel/bpf/preload/Makefile41
-rw-r--r--kernel/bpf/preload/bpf_preload.h8
-rw-r--r--kernel/bpf/preload/bpf_preload_kern.c126
-rw-r--r--kernel/bpf/preload/bpf_preload_umd_blob.S7
-rw-r--r--kernel/bpf/preload/iterators/Makefile6
-rw-r--r--kernel/bpf/preload/iterators/bpf_preload_common.h13
-rw-r--r--kernel/bpf/preload/iterators/iterators.c94
-rw-r--r--kernel/bpf/preload/iterators/iterators.lskel.h425
-rw-r--r--kernel/bpf/preload/iterators/iterators.skel.h412
-rw-r--r--kernel/bpf/queue_stack_maps.c10
-rw-r--r--kernel/bpf/reuseport_array.c8
-rw-r--r--kernel/bpf/ringbuf.c88
-rw-r--r--kernel/bpf/stackmap.c75
-rw-r--r--kernel/bpf/syscall.c591
-rw-r--r--kernel/bpf/task_iter.c1
-rw-r--r--kernel/bpf/trampoline.c146
-rw-r--r--kernel/bpf/verifier.c1211
41 files changed, 4549 insertions, 1669 deletions
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index d24d518ddd63..2dfe1079f772 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -27,9 +27,11 @@ config BPF_SYSCALL
bool "Enable bpf() system call"
select BPF
select IRQ_WORK
+ select TASKS_RCU if PREEMPTION
select TASKS_TRACE_RCU
select BINARY_PRINTF
select NET_SOCK_MSG if NET
+ select PAGE_POOL if NET
default n
help
Enable the bpf() system call that allows to manipulate BPF programs
@@ -58,6 +60,10 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid speculative
execution of BPF instructions by the interpreter.
+ When CONFIG_BPF_JIT_ALWAYS_ON is enabled, /proc/sys/net/core/bpf_jit_enable
+ is permanently set to 1 and setting any other value than that will
+ return failure.
+
config BPF_JIT_DEFAULT_ON
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
depends on HAVE_EBPF_JIT && BPF_JIT
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index c1a9be6a4b9f..057ba8e01e70 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c7a5be3bf8be..fe40d3b9458f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>
+#include <linux/btf_ids.h>
#include "map_in_map.h"
@@ -242,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
+static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u32 index = *(u32 *)key;
+
+ if (cpu >= nr_cpu_ids)
+ return NULL;
+
+ if (unlikely(index >= array->map.max_entries))
+ return NULL;
+
+ return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
+}
+
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -287,10 +302,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return 0;
}
-static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
+static void check_and_free_fields(struct bpf_array *arr, void *val)
{
- if (unlikely(map_value_has_timer(&arr->map)))
+ if (map_value_has_timer(&arr->map))
bpf_timer_cancel_and_free(val + arr->map.timer_off);
+ if (map_value_has_kptrs(&arr->map))
+ bpf_map_free_kptrs(&arr->map, val);
}
/* Called from syscall or from eBPF program */
@@ -327,7 +344,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
- check_and_free_timer_in_array(array, val);
+ check_and_free_fields(array, val);
}
return 0;
}
@@ -386,7 +403,8 @@ static void array_map_free_timers(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- if (likely(!map_value_has_timer(map)))
+ /* We don't reset or free kptr on uref dropping to zero. */
+ if (!map_value_has_timer(map))
return;
for (i = 0; i < array->map.max_entries; i++)
@@ -398,6 +416,13 @@ static void array_map_free_timers(struct bpf_map *map)
static void array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+
+ if (map_value_has_kptrs(map)) {
+ for (i = 0; i < array->map.max_entries; i++)
+ bpf_map_free_kptrs(map, array->value + array->elem_size * i);
+ bpf_map_free_kptr_off_tab(map);
+ }
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
@@ -680,7 +705,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_
return num_elems;
}
-static int array_map_btf_id;
+BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal,
.map_alloc_check = array_map_alloc_check,
@@ -701,12 +726,10 @@ const struct bpf_map_ops array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
- .map_btf_name = "bpf_array",
- .map_btf_id = &array_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
-static int percpu_array_map_btf_id;
const struct bpf_map_ops percpu_array_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = array_map_alloc_check,
@@ -716,14 +739,14 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
+ .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
- .map_btf_name = "bpf_array",
- .map_btf_id = &percpu_array_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
@@ -837,13 +860,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
- if (!bpf_prog_array_compatible(array, prog)) {
+ if (!bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
@@ -1071,7 +1093,6 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
INIT_WORK(&aux->work, prog_array_map_clear_deferred);
INIT_LIST_HEAD(&aux->poke_progs);
mutex_init(&aux->poke_mutex);
- spin_lock_init(&aux->owner.lock);
map = array_map_alloc(attr);
if (IS_ERR(map)) {
@@ -1104,7 +1125,6 @@ static void prog_array_map_free(struct bpf_map *map)
* Thus, prog_array_map cannot be used as an inner_map
* and map_meta_equal is not implemented.
*/
-static int prog_array_map_btf_id;
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = prog_array_map_alloc,
@@ -1120,8 +1140,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = prog_array_map_clear,
.map_seq_show_elem = prog_array_map_seq_show_elem,
- .map_btf_name = "bpf_array",
- .map_btf_id = &prog_array_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -1210,7 +1229,6 @@ static void perf_event_fd_array_map_free(struct bpf_map *map)
fd_array_map_free(map);
}
-static int perf_event_array_map_btf_id;
const struct bpf_map_ops perf_event_array_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = fd_array_map_alloc_check,
@@ -1223,8 +1241,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
.map_release = perf_event_fd_array_release,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_array",
- .map_btf_id = &perf_event_array_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
};
#ifdef CONFIG_CGROUPS
@@ -1247,7 +1264,6 @@ static void cgroup_fd_array_free(struct bpf_map *map)
fd_array_map_free(map);
}
-static int cgroup_array_map_btf_id;
const struct bpf_map_ops cgroup_array_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = fd_array_map_alloc_check,
@@ -1259,8 +1275,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_array",
- .map_btf_id = &cgroup_array_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
};
#endif
@@ -1334,7 +1349,6 @@ static int array_of_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}
-static int array_of_maps_map_btf_id;
const struct bpf_map_ops array_of_maps_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_of_map_alloc,
@@ -1346,7 +1360,8 @@ const struct bpf_map_ops array_of_maps_map_ops = {
.map_fd_put_ptr = bpf_map_fd_put_ptr,
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
.map_gen_lookup = array_of_map_gen_lookup,
+ .map_lookup_batch = generic_map_lookup_batch,
+ .map_update_batch = generic_map_update_batch,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_array",
- .map_btf_id = &array_of_maps_map_btf_id,
+ .map_btf_id = &array_map_btf_ids[0],
};
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index b141a1346f72..b9ea539a5561 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -7,6 +7,7 @@
#include <linux/err.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <linux/btf_ids.h>
#define BLOOM_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK)
@@ -192,7 +193,7 @@ static int bloom_map_check_btf(const struct bpf_map *map,
return btf_type_is_void(key_type) ? 0 : -EINVAL;
}
-static int bpf_bloom_map_btf_id;
+BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = bloom_map_alloc,
@@ -205,6 +206,5 @@ const struct bpf_map_ops bloom_filter_map_ops = {
.map_update_elem = bloom_map_update_elem,
.map_delete_elem = bloom_map_delete_elem,
.map_check_btf = bloom_map_check_btf,
- .map_btf_name = "bpf_bloom_filter",
- .map_btf_id = &bpf_bloom_map_btf_id,
+ .map_btf_id = &bpf_bloom_map_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e29d9e3d853e..5f7683b19199 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -90,7 +90,7 @@ void bpf_inode_storage_free(struct inode *inode)
*/
bpf_selem_unlink_map(selem);
free_inode_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false);
+ local_storage, selem, false, false);
}
raw_spin_unlock_bh(&local_storage->lock);
rcu_read_unlock();
@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
sdata = bpf_local_storage_update(f->f_inode,
(struct bpf_local_storage_map *)map,
- value, map_flags);
+ value, map_flags, GFP_ATOMIC);
fput(f);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -149,7 +149,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);
return 0;
}
@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
-BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
- void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
+ void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST);
+ BPF_NOEXIST, gfp_flags);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
@@ -244,7 +245,8 @@ static void inode_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(smap, NULL);
}
-static int inode_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
+ bpf_local_storage_map)
const struct bpf_map_ops inode_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -255,8 +257,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_update_elem = bpf_fd_inode_storage_update_elem,
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_name = "bpf_local_storage_map",
- .map_btf_id = &inode_storage_map_btf_id,
+ .map_btf_id = &inode_storage_map_btf_ids[0],
.map_owner_storage_ptr = inode_storage_ptr,
};
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..d5d96ceca105 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
struct bpf_iter_target_info {
struct list_head list;
@@ -329,35 +330,34 @@ static void cache_btf_id(struct bpf_iter_target_info *tinfo,
bool bpf_iter_prog_supported(struct bpf_prog *prog)
{
const char *attach_fname = prog->aux->attach_func_name;
+ struct bpf_iter_target_info *tinfo = NULL, *iter;
u32 prog_btf_id = prog->aux->attach_btf_id;
const char *prefix = BPF_ITER_FUNC_PREFIX;
- struct bpf_iter_target_info *tinfo;
int prefix_len = strlen(prefix);
- bool supported = false;
if (strncmp(attach_fname, prefix, prefix_len))
return false;
mutex_lock(&targets_mutex);
- list_for_each_entry(tinfo, &targets, list) {
- if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
- supported = true;
+ list_for_each_entry(iter, &targets, list) {
+ if (iter->btf_id && iter->btf_id == prog_btf_id) {
+ tinfo = iter;
break;
}
- if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
- cache_btf_id(tinfo, prog);
- supported = true;
+ if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) {
+ cache_btf_id(iter, prog);
+ tinfo = iter;
break;
}
}
mutex_unlock(&targets_mutex);
- if (supported) {
+ if (tinfo) {
prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
}
- return supported;
+ return tinfo != NULL;
}
const struct bpf_func_proto *
@@ -498,12 +498,11 @@ bool bpf_link_is_iter(struct bpf_link *link)
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
struct bpf_prog *prog)
{
+ struct bpf_iter_target_info *tinfo = NULL, *iter;
struct bpf_link_primer link_primer;
- struct bpf_iter_target_info *tinfo;
union bpf_iter_link_info linfo;
struct bpf_iter_link *link;
u32 prog_btf_id, linfo_len;
- bool existed = false;
bpfptr_t ulinfo;
int err;
@@ -529,14 +528,14 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
prog_btf_id = prog->aux->attach_btf_id;
mutex_lock(&targets_mutex);
- list_for_each_entry(tinfo, &targets, list) {
- if (tinfo->btf_id == prog_btf_id) {
- existed = true;
+ list_for_each_entry(iter, &targets, list) {
+ if (iter->btf_id == prog_btf_id) {
+ tinfo = iter;
break;
}
}
mutex_unlock(&targets_mutex);
- if (!existed)
+ if (!tinfo)
return -ENOENT;
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
@@ -546,7 +545,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
link->tinfo = tinfo;
- err = bpf_link_prime(&link->link, &link_primer);
+ err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
return err;
@@ -684,11 +683,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
{
int ret;
- rcu_read_lock();
- migrate_disable();
- ret = bpf_prog_run(prog, ctx);
- migrate_enable();
- rcu_read_unlock();
+ if (prog->aux->sleepable) {
+ rcu_read_lock_trace();
+ migrate_disable();
+ might_fault();
+ ret = bpf_prog_run(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock_trace();
+ } else {
+ rcu_read_lock();
+ migrate_disable();
+ ret = bpf_prog_run(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock();
+ }
/* bpf program can only return 0 or 1:
* 0 : okay
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 71de2a89869c..8ce40fd869f6 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
- void *value, bool charge_mem)
+ void *value, bool charge_mem, gfp_t gfp_flags)
{
struct bpf_local_storage_elem *selem;
@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
- GFP_ATOMIC | __GFP_NOWARN);
+ gfp_flags | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -106,7 +106,7 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
*/
bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem,
- bool uncharge_mem)
+ bool uncharge_mem, bool use_trace_rcu)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
@@ -136,7 +136,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
* will be done by the caller.
*
* Although the unlock will be done under
- * rcu_read_lock(), it is more intutivie to
+ * rcu_read_lock(), it is more intuitive to
* read if the freeing of the storage is done
* after the raw_spin_unlock_bh(&local_storage->lock).
*
@@ -150,11 +150,16 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+ if (use_trace_rcu)
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+ else
+ kfree_rcu(selem, rcu);
+
return free_local_storage;
}
-static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
+static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
+ bool use_trace_rcu)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
@@ -169,12 +174,16 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, true);
+ local_storage, selem, true, use_trace_rcu);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- if (free_local_storage)
- call_rcu_tasks_trace(&local_storage->rcu,
+ if (free_local_storage) {
+ if (use_trace_rcu)
+ call_rcu_tasks_trace(&local_storage->rcu,
bpf_local_storage_free_rcu);
+ else
+ kfree_rcu(local_storage, rcu);
+ }
}
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -214,14 +223,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
raw_spin_unlock_irqrestore(&b->lock, flags);
}
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
{
/* Always unlink from map before unlinking from local_storage
* because selem will be freed after successfully unlinked from
* the local_storage.
*/
bpf_selem_unlink_map(selem);
- __bpf_selem_unlink_storage(selem);
+ __bpf_selem_unlink_storage(selem, use_trace_rcu);
}
struct bpf_local_storage_data *
@@ -282,7 +291,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
int bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *first_selem)
+ struct bpf_local_storage_elem *first_selem,
+ gfp_t gfp_flags)
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
@@ -293,7 +303,7 @@ int bpf_local_storage_alloc(void *owner,
return err;
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
- GFP_ATOMIC | __GFP_NOWARN);
+ gfp_flags | __GFP_NOWARN);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -350,10 +360,10 @@ uncharge:
*/
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
- void *value, u64 map_flags)
+ void *value, u64 map_flags, gfp_t gfp_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
- struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_elem *selem = NULL;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;
@@ -365,6 +375,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!map_value_has_spin_lock(&smap->map)))
return ERR_PTR(-EINVAL);
+ if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
+ return ERR_PTR(-EINVAL);
+
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -373,11 +386,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err)
return ERR_PTR(err);
- selem = bpf_selem_alloc(smap, owner, value, true);
+ selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
if (!selem)
return ERR_PTR(-ENOMEM);
- err = bpf_local_storage_alloc(owner, smap, selem);
+ err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
if (err) {
kfree(selem);
mem_uncharge(smap, owner, smap->elem_size);
@@ -404,6 +417,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}
+ if (gfp_flags == GFP_KERNEL) {
+ selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+ if (!selem)
+ return ERR_PTR(-ENOMEM);
+ }
+
raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */
@@ -429,19 +448,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
- /* local_storage->lock is held. Hence, we are sure
- * we can unlink and uncharge the old_sdata successfully
- * later. Hence, instead of charging the new selem now
- * and then uncharge the old selem later (which may cause
- * a potential but unnecessary charge failure), avoid taking
- * a charge at all here (the "!old_sdata" check) and the
- * old_sdata will not be uncharged later during
- * bpf_selem_unlink_storage_nolock().
- */
- selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
- if (!selem) {
- err = -ENOMEM;
- goto unlock_err;
+ if (gfp_flags != GFP_KERNEL) {
+ /* local_storage->lock is held. Hence, we are sure
+ * we can unlink and uncharge the old_sdata successfully
+ * later. Hence, instead of charging the new selem now
+ * and then uncharge the old selem later (which may cause
+ * a potential but unnecessary charge failure), avoid taking
+ * a charge at all here (the "!old_sdata" check) and the
+ * old_sdata will not be uncharged later during
+ * bpf_selem_unlink_storage_nolock().
+ */
+ selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
+ if (!selem) {
+ err = -ENOMEM;
+ goto unlock_err;
+ }
}
/* First, link the new selem to the map */
@@ -454,7 +475,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (old_sdata) {
bpf_selem_unlink_map(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
- false);
+ false, true);
}
unlock:
@@ -463,6 +484,10 @@ unlock:
unlock_err:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ if (selem) {
+ mem_uncharge(smap, owner, smap->elem_size);
+ kfree(selem);
+ }
return ERR_PTR(err);
}
@@ -532,7 +557,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
migrate_disable();
__this_cpu_inc(*busy_counter);
}
- bpf_selem_unlink(selem);
+ bpf_selem_unlink(selem, false);
if (busy_counter) {
__this_cpu_dec(*busy_counter);
migrate_enable();
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 6b12f06ee18c..4ea227c9c1ad 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -4,6 +4,7 @@
#ifndef __BPF_LRU_LIST_H_
#define __BPF_LRU_LIST_H_
+#include <linux/cache.h>
#include <linux/list.h>
#include <linux/spinlock_types.h>
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 9e4ecc990647..c1351df9f7ee 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -99,6 +99,39 @@ static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
.allowed = bpf_ima_inode_hash_allowed,
};
+BPF_CALL_3(bpf_ima_file_hash, struct file *, file, void *, dst, u32, size)
+{
+ return ima_file_hash(file, dst, size);
+}
+
+BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
+
+static const struct bpf_func_proto bpf_ima_file_hash_proto = {
+ .func = bpf_ima_file_hash,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .allowed = bpf_ima_inode_hash_allowed,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie, void *, ctx)
+{
+ struct bpf_trace_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+ return run_ctx->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto = {
+ .func = bpf_get_attach_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -121,6 +154,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
+ case BPF_FUNC_ima_file_hash:
+ return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
+ case BPF_FUNC_get_attach_cookie:
+ return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
default:
return tracing_prog_func_proto(func_id, prog);
}
@@ -167,6 +204,7 @@ BTF_ID(func, bpf_lsm_inode_setxattr)
BTF_ID(func, bpf_lsm_inode_symlink)
BTF_ID(func, bpf_lsm_inode_unlink)
BTF_ID(func, bpf_lsm_kernel_module_request)
+BTF_ID(func, bpf_lsm_kernel_read_file)
BTF_ID(func, bpf_lsm_kernfs_init_security)
#ifdef CONFIG_KEYS
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 21069dbe9138..d9a3c9207240 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -10,6 +10,7 @@
#include <linux/seq_file.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
+#include <linux/btf_ids.h>
enum bpf_struct_ops_state {
BPF_STRUCT_OPS_STATE_INIT,
@@ -32,15 +33,15 @@ struct bpf_struct_ops_map {
const struct bpf_struct_ops *st_ops;
/* protect map_update */
struct mutex lock;
- /* progs has all the bpf_prog that is populated
+ /* link has all the bpf_links that is populated
* to the func ptr of the kernel's struct
* (in kvalue.data).
*/
- struct bpf_prog **progs;
+ struct bpf_link **links;
/* image is a page that has all the trampolines
* that stores the func args before calling the bpf_prog.
* A PAGE_SIZE "image" is enough to store all trampoline for
- * "progs[]".
+ * "links[]".
*/
void *image;
/* uvalue->data stores the kernel struct
@@ -263,7 +264,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
/* No lock is needed. state and refcnt do not need
* to be updated together under atomic context.
*/
- uvalue = (struct bpf_struct_ops_value *)value;
+ uvalue = value;
memcpy(uvalue, st_map->uvalue, map->value_size);
uvalue->state = state;
refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
@@ -282,9 +283,9 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
u32 i;
for (i = 0; i < btf_type_vlen(t); i++) {
- if (st_map->progs[i]) {
- bpf_prog_put(st_map->progs[i]);
- st_map->progs[i] = NULL;
+ if (st_map->links[i]) {
+ bpf_link_put(st_map->links[i]);
+ st_map->links[i] = NULL;
}
}
}
@@ -315,18 +316,34 @@ static int check_zero_holes(const struct btf_type *t, void *data)
return 0;
}
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs,
- struct bpf_prog *prog,
+static void bpf_struct_ops_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_struct_ops_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link);
+
+ kfree(tlink);
+}
+
+const struct bpf_link_ops bpf_struct_ops_link_lops = {
+ .release = bpf_struct_ops_link_release,
+ .dealloc = bpf_struct_ops_link_dealloc,
+};
+
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_link *link,
const struct btf_func_model *model,
void *image, void *image_end)
{
u32 flags;
- tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
- tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+ tlinks[BPF_TRAMP_FENTRY].links[0] = link;
+ tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
return arch_prepare_bpf_trampoline(NULL, image, image_end,
- model, flags, tprogs, NULL);
+ model, flags, tlinks, NULL);
}
static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -337,7 +354,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_struct_ops_value *uvalue, *kvalue;
const struct btf_member *member;
const struct btf_type *t = st_ops->type;
- struct bpf_tramp_progs *tprogs = NULL;
+ struct bpf_tramp_links *tlinks = NULL;
void *udata, *kdata;
int prog_fd, err = 0;
void *image, *image_end;
@@ -353,7 +370,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (err)
return err;
- uvalue = (struct bpf_struct_ops_value *)value;
+ uvalue = value;
err = check_zero_holes(t, uvalue->data);
if (err)
return err;
@@ -361,8 +378,8 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->state || refcount_read(&uvalue->refcnt))
return -EINVAL;
- tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
- if (!tprogs)
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks)
return -ENOMEM;
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
@@ -385,6 +402,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
+ struct bpf_tramp_link *link;
u32 moff;
moff = __btf_member_bit_offset(t, member) / 8;
@@ -438,16 +456,26 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
err = PTR_ERR(prog);
goto reset_unlock;
}
- st_map->progs[i] = prog;
if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
prog->aux->attach_btf_id != st_ops->type_id ||
prog->expected_attach_type != i) {
+ bpf_prog_put(prog);
err = -EINVAL;
goto reset_unlock;
}
- err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ bpf_prog_put(prog);
+ err = -ENOMEM;
+ goto reset_unlock;
+ }
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
+ &bpf_struct_ops_link_lops, prog);
+ st_map->links[i] = &link->link;
+
+ err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[i],
image, image_end);
if (err < 0)
@@ -490,7 +518,7 @@ reset_unlock:
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
- kfree(tprogs);
+ kfree(tlinks);
mutex_unlock(&st_map->lock);
return err;
}
@@ -545,9 +573,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
- if (st_map->progs)
+ if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
- bpf_map_area_free(st_map->progs);
+ bpf_map_area_free(st_map->links);
bpf_jit_free_exec(st_map->image);
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
@@ -596,11 +624,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
map = &st_map->map;
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
- st_map->progs =
- bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *),
+ st_map->links =
+ bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
NUMA_NO_NODE);
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
- if (!st_map->uvalue || !st_map->progs || !st_map->image) {
+ if (!st_map->uvalue || !st_map->links || !st_map->image) {
bpf_struct_ops_map_free(map);
return ERR_PTR(-ENOMEM);
}
@@ -612,7 +640,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
return map;
}
-static int bpf_struct_ops_map_btf_id;
+BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map)
const struct bpf_map_ops bpf_struct_ops_map_ops = {
.map_alloc_check = bpf_struct_ops_map_alloc_check,
.map_alloc = bpf_struct_ops_map_alloc,
@@ -622,8 +650,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = {
.map_delete_elem = bpf_struct_ops_map_delete_elem,
.map_update_elem = bpf_struct_ops_map_update_elem,
.map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
- .map_btf_name = "bpf_struct_ops_map",
- .map_btf_id = &bpf_struct_ops_map_btf_id,
+ .map_btf_id = &bpf_struct_ops_map_btf_ids[0],
};
/* "const void *" because some subsystem is
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 5da7bed0f5f6..e9014dc62682 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -102,7 +102,7 @@ void bpf_task_storage_free(struct task_struct *task)
*/
bpf_selem_unlink_map(selem);
free_task_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false);
+ local_storage, selem, false, false);
}
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_task_storage_unlock();
@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
bpf_task_storage_lock();
sdata = bpf_local_storage_update(
- task, (struct bpf_local_storage_map *)map, value, map_flags);
+ task, (struct bpf_local_storage_map *)map, value, map_flags,
+ GFP_ATOMIC);
bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
@@ -191,7 +192,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);
return 0;
}
@@ -226,8 +227,9 @@ out:
return err;
}
-BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST);
+ BPF_NOEXIST, gfp_flags);
unlock:
bpf_task_storage_unlock();
@@ -305,7 +307,7 @@ static void task_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
}
-static int task_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops task_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -316,8 +318,7 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_update_elem = bpf_pid_task_storage_update_elem,
.map_delete_elem = bpf_pid_task_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_name = "bpf_local_storage_map",
- .map_btf_id = &task_storage_map_btf_id,
+ .map_btf_id = &task_storage_map_btf_ids[0],
.map_owner_storage_ptr = task_storage_ptr,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 3e23b3fa79ff..eb12d4f705cc 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
@@ -198,6 +198,29 @@
DEFINE_IDR(btf_idr);
DEFINE_SPINLOCK(btf_idr_lock);
+enum btf_kfunc_hook {
+ BTF_KFUNC_HOOK_XDP,
+ BTF_KFUNC_HOOK_TC,
+ BTF_KFUNC_HOOK_STRUCT_OPS,
+ BTF_KFUNC_HOOK_TRACING,
+ BTF_KFUNC_HOOK_SYSCALL,
+ BTF_KFUNC_HOOK_MAX,
+};
+
+enum {
+ BTF_KFUNC_SET_MAX_CNT = 32,
+ BTF_DTOR_KFUNC_MAX_CNT = 256,
+};
+
+struct btf_kfunc_set_tab {
+ struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+};
+
+struct btf_id_dtor_kfunc_tab {
+ u32 cnt;
+ struct btf_id_dtor_kfunc dtors[];
+};
+
struct btf {
void *data;
struct btf_type **types;
@@ -212,6 +235,8 @@ struct btf {
refcount_t refcnt;
u32 id;
struct rcu_head rcu;
+ struct btf_kfunc_set_tab *kfunc_set_tab;
+ struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
/* split BTF support */
struct btf *base_btf;
@@ -403,6 +428,9 @@ static struct btf_type btf_void;
static int btf_resolve(struct btf_verifier_env *env,
const struct btf_type *t, u32 type_id);
+static int btf_func_check(struct btf_verifier_env *env,
+ const struct btf_type *t);
+
static bool btf_type_is_modifier(const struct btf_type *t)
{
/* Some of them is not strictly a C modifier
@@ -506,6 +534,50 @@ s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
return -ENOENT;
}
+static s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
+{
+ struct btf *btf;
+ s32 ret;
+ int id;
+
+ btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+ if (!btf)
+ return -EINVAL;
+
+ ret = btf_find_by_name_kind(btf, name, kind);
+ /* ret is never zero, since btf_find_by_name_kind returns
+ * positive btf_id or negative error.
+ */
+ if (ret > 0) {
+ btf_get(btf);
+ *btf_p = btf;
+ return ret;
+ }
+
+ /* If name is not found in vmlinux's BTF then search in module's BTFs */
+ spin_lock_bh(&btf_idr_lock);
+ idr_for_each_entry(&btf_idr, btf, id) {
+ if (!btf_is_module(btf))
+ continue;
+ /* linear search could be slow hence unlock/lock
+ * the IDR to avoiding holding it for too long
+ */
+ btf_get(btf);
+ spin_unlock_bh(&btf_idr_lock);
+ ret = btf_find_by_name_kind(btf, name, kind);
+ if (ret > 0) {
+ *btf_p = btf;
+ return ret;
+ }
+ spin_lock_bh(&btf_idr_lock);
+ btf_put(btf);
+ }
+ spin_unlock_bh(&btf_idr_lock);
+ return ret;
+}
+
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id)
{
@@ -579,6 +651,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
+ btf_type_is_func(t) ||
btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
@@ -1531,8 +1604,41 @@ static void btf_free_id(struct btf *btf)
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
+static void btf_free_kfunc_set_tab(struct btf *btf)
+{
+ struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
+ int hook, type;
+
+ if (!tab)
+ return;
+ /* For module BTF, we directly assign the sets being registered, so
+ * there is nothing to free except kfunc_set_tab.
+ */
+ if (btf_is_module(btf))
+ goto free_tab;
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
+ for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
+ kfree(tab->sets[hook][type]);
+ }
+free_tab:
+ kfree(tab);
+ btf->kfunc_set_tab = NULL;
+}
+
+static void btf_free_dtor_kfunc_tab(struct btf *btf)
+{
+ struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
+
+ if (!tab)
+ return;
+ kfree(tab);
+ btf->dtor_kfunc_tab = NULL;
+}
+
static void btf_free(struct btf *btf)
{
+ btf_free_dtor_kfunc_tab(btf);
+ btf_free_kfunc_set_tab(btf);
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
@@ -2505,7 +2611,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
*
* We now need to continue from the last-resolved-ptr to
* ensure the last-resolved-ptr will not referring back to
- * the currenct ptr (t).
+ * the current ptr (t).
*/
if (btf_type_is_modifier(next_type)) {
const struct btf_type *resolved_type;
@@ -3077,24 +3183,86 @@ static void btf_struct_log(struct btf_verifier_env *env,
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
+enum btf_field_type {
+ BTF_FIELD_SPIN_LOCK,
+ BTF_FIELD_TIMER,
+ BTF_FIELD_KPTR,
+};
+
+enum {
+ BTF_FIELD_IGNORE = 0,
+ BTF_FIELD_FOUND = 1,
+};
+
+struct btf_field_info {
+ u32 type_id;
+ u32 off;
+ enum bpf_kptr_type type;
+};
+
+static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
+ u32 off, int sz, struct btf_field_info *info)
+{
+ if (!__btf_type_is_struct(t))
+ return BTF_FIELD_IGNORE;
+ if (t->size != sz)
+ return BTF_FIELD_IGNORE;
+ info->off = off;
+ return BTF_FIELD_FOUND;
+}
+
+static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
+ u32 off, int sz, struct btf_field_info *info)
+{
+ enum bpf_kptr_type type;
+ u32 res_id;
+
+ /* For PTR, sz is always == 8 */
+ if (!btf_type_is_ptr(t))
+ return BTF_FIELD_IGNORE;
+ t = btf_type_by_id(btf, t->type);
+
+ if (!btf_type_is_type_tag(t))
+ return BTF_FIELD_IGNORE;
+ /* Reject extra tags */
+ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
+ return -EINVAL;
+ if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_KPTR_UNREF;
+ else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_KPTR_REF;
+ else
+ return -EINVAL;
+
+ /* Get the base type */
+ t = btf_type_skip_modifiers(btf, t->type, &res_id);
+ /* Only pointer to struct is allowed */
+ if (!__btf_type_is_struct(t))
+ return -EINVAL;
+
+ info->type_id = res_id;
+ info->off = off;
+ info->type = type;
+ return BTF_FIELD_FOUND;
+}
+
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ const char *name, int sz, int align,
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
const struct btf_member *member;
- u32 i, off = -ENOENT;
+ struct btf_field_info tmp;
+ int ret, idx = 0;
+ u32 i, off;
for_each_member(i, t, member) {
const struct btf_type *member_type = btf_type_by_id(btf,
member->type);
- if (!__btf_type_is_struct(member_type))
- continue;
- if (member_type->size != sz)
- continue;
- if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
+
+ if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
continue;
- if (off != -ENOENT)
- /* only one such field is allowed */
- return -E2BIG;
+
off = __btf_member_bit_offset(t, member);
if (off % 8)
/* valid C code cannot generate such BTF */
@@ -3102,46 +3270,115 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
off /= 8;
if (off % align)
return -EINVAL;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ case BTF_FIELD_TIMER:
+ ret = btf_find_struct(btf, member_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BTF_FIELD_KPTR:
+ ret = btf_find_kptr(btf, member_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ if (ret == BTF_FIELD_IGNORE)
+ continue;
+ if (idx >= info_cnt)
+ return -E2BIG;
+ ++idx;
}
- return off;
+ return idx;
}
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ const char *name, int sz, int align,
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
const struct btf_var_secinfo *vsi;
- u32 i, off = -ENOENT;
+ struct btf_field_info tmp;
+ int ret, idx = 0;
+ u32 i, off;
for_each_vsi(i, t, vsi) {
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
- if (!__btf_type_is_struct(var_type))
- continue;
- if (var_type->size != sz)
+ off = vsi->offset;
+
+ if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
continue;
if (vsi->size != sz)
continue;
- if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
- continue;
- if (off != -ENOENT)
- /* only one such field is allowed */
- return -E2BIG;
- off = vsi->offset;
if (off % align)
return -EINVAL;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ case BTF_FIELD_TIMER:
+ ret = btf_find_struct(btf, var_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ case BTF_FIELD_KPTR:
+ ret = btf_find_kptr(btf, var_type, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ if (ret == BTF_FIELD_IGNORE)
+ continue;
+ if (idx >= info_cnt)
+ return -E2BIG;
+ ++idx;
}
- return off;
+ return idx;
}
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
- const char *name, int sz, int align)
+ enum btf_field_type field_type,
+ struct btf_field_info *info, int info_cnt)
{
+ const char *name;
+ int sz, align;
+
+ switch (field_type) {
+ case BTF_FIELD_SPIN_LOCK:
+ name = "bpf_spin_lock";
+ sz = sizeof(struct bpf_spin_lock);
+ align = __alignof__(struct bpf_spin_lock);
+ break;
+ case BTF_FIELD_TIMER:
+ name = "bpf_timer";
+ sz = sizeof(struct bpf_timer);
+ align = __alignof__(struct bpf_timer);
+ break;
+ case BTF_FIELD_KPTR:
+ name = NULL;
+ sz = sizeof(u64);
+ align = 8;
+ break;
+ default:
+ return -EFAULT;
+ }
if (__btf_type_is_struct(t))
- return btf_find_struct_field(btf, t, name, sz, align);
+ return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt);
else if (btf_type_is_datasec(t))
- return btf_find_datasec_var(btf, t, name, sz, align);
+ return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt);
return -EINVAL;
}
@@ -3151,16 +3388,130 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
*/
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
{
- return btf_find_field(btf, t, "bpf_spin_lock",
- sizeof(struct bpf_spin_lock),
- __alignof__(struct bpf_spin_lock));
+ struct btf_field_info info;
+ int ret;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENOENT;
+ return info.off;
}
int btf_find_timer(const struct btf *btf, const struct btf_type *t)
{
- return btf_find_field(btf, t, "bpf_timer",
- sizeof(struct bpf_timer),
- __alignof__(struct bpf_timer));
+ struct btf_field_info info;
+ int ret;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENOENT;
+ return info.off;
+}
+
+struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
+ const struct btf_type *t)
+{
+ struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX];
+ struct bpf_map_value_off *tab;
+ struct btf *kernel_btf = NULL;
+ struct module *mod = NULL;
+ int ret, i, nr_off;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (!ret)
+ return NULL;
+
+ nr_off = ret;
+ tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < nr_off; i++) {
+ const struct btf_type *t;
+ s32 id;
+
+ /* Find type in map BTF, and use it to look up the matching type
+ * in vmlinux or module BTFs, by name and kind.
+ */
+ t = btf_type_by_id(btf, info_arr[i].type_id);
+ id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
+ &kernel_btf);
+ if (id < 0) {
+ ret = id;
+ goto end;
+ }
+
+ /* Find and stash the function pointer for the destruction function that
+ * needs to be eventually invoked from the map free path.
+ */
+ if (info_arr[i].type == BPF_KPTR_REF) {
+ const struct btf_type *dtor_func;
+ const char *dtor_func_name;
+ unsigned long addr;
+ s32 dtor_btf_id;
+
+ /* This call also serves as a whitelist of allowed objects that
+ * can be used as a referenced pointer and be stored in a map at
+ * the same time.
+ */
+ dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
+ if (dtor_btf_id < 0) {
+ ret = dtor_btf_id;
+ goto end_btf;
+ }
+
+ dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
+ if (!dtor_func) {
+ ret = -ENOENT;
+ goto end_btf;
+ }
+
+ if (btf_is_module(kernel_btf)) {
+ mod = btf_try_get_module(kernel_btf);
+ if (!mod) {
+ ret = -ENXIO;
+ goto end_btf;
+ }
+ }
+
+ /* We already verified dtor_func to be btf_type_is_func
+ * in register_btf_id_dtor_kfuncs.
+ */
+ dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
+ addr = kallsyms_lookup_name(dtor_func_name);
+ if (!addr) {
+ ret = -EINVAL;
+ goto end_mod;
+ }
+ tab->off[i].kptr.dtor = (void *)addr;
+ }
+
+ tab->off[i].offset = info_arr[i].off;
+ tab->off[i].type = info_arr[i].type;
+ tab->off[i].kptr.btf_id = id;
+ tab->off[i].kptr.btf = kernel_btf;
+ tab->off[i].kptr.module = mod;
+ }
+ tab->nr_off = nr_off;
+ return tab;
+end_mod:
+ module_put(mod);
+end_btf:
+ btf_put(kernel_btf);
+end:
+ while (i--) {
+ btf_put(tab->off[i].kptr.btf);
+ if (tab->off[i].kptr.module)
+ module_put(tab->off[i].kptr.module);
+ }
+ kfree(tab);
+ return ERR_PTR(ret);
}
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
@@ -3533,9 +3884,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env,
return 0;
}
+static int btf_func_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ int err;
+
+ err = btf_func_check(env, t);
+ if (err)
+ return err;
+
+ env_stack_pop_resolved(env, next_type_id, 0);
+ return 0;
+}
+
static struct btf_kind_operations func_ops = {
.check_meta = btf_func_check_meta,
- .resolve = btf_df_resolve,
+ .resolve = btf_func_resolve,
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_ref_type_log,
@@ -4156,7 +4522,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
return !btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
- if (btf_type_is_decl_tag(t))
+ if (btf_type_is_decl_tag(t) || btf_type_is_func(t))
return btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
@@ -4246,12 +4612,6 @@ static int btf_check_all_types(struct btf_verifier_env *env)
if (err)
return err;
}
-
- if (btf_type_is_func(t)) {
- err = btf_func_check(env, t);
- if (err)
- return err;
- }
}
return 0;
@@ -4387,8 +4747,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
btf = env->btf;
btf_data_size = btf->data_size;
- if (btf_data_size <
- offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
+ if (btf_data_size < offsetofend(struct btf_header, hdr_len)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
@@ -4447,6 +4806,53 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return 0;
}
+static int btf_check_type_tags(struct btf_verifier_env *env,
+ struct btf *btf, int start_id)
+{
+ int i, n, good_id = start_id - 1;
+ bool in_tags;
+
+ n = btf_nr_types(btf);
+ for (i = start_id; i < n; i++) {
+ const struct btf_type *t;
+ int chain_limit = 32;
+ u32 cur_id = i;
+
+ t = btf_type_by_id(btf, i);
+ if (!t)
+ return -EINVAL;
+ if (!btf_type_is_modifier(t))
+ continue;
+
+ cond_resched();
+
+ in_tags = btf_type_is_type_tag(t);
+ while (btf_type_is_modifier(t)) {
+ if (!chain_limit--) {
+ btf_verifier_log(env, "Max chain length or cycle detected");
+ return -ELOOP;
+ }
+ if (btf_type_is_type_tag(t)) {
+ if (!in_tags) {
+ btf_verifier_log(env, "Type tags don't precede modifiers");
+ return -EINVAL;
+ }
+ } else if (in_tags) {
+ in_tags = false;
+ }
+ if (cur_id <= good_id)
+ break;
+ /* Move to next type */
+ cur_id = t->type;
+ t = btf_type_by_id(btf, cur_id);
+ if (!t)
+ return -EINVAL;
+ }
+ good_id = i;
+ }
+ return 0;
+}
+
static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
u32 log_level, char __user *log_ubuf, u32 log_size)
{
@@ -4514,6 +4920,10 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, 1);
+ if (err)
+ goto errout;
+
if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
@@ -4622,41 +5032,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
return ctx_type;
}
-static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
-#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
-#define BPF_LINK_TYPE(_id, _name)
-#define BPF_MAP_TYPE(_id, _ops) \
- [_id] = &_ops,
-#include <linux/bpf_types.h>
-#undef BPF_PROG_TYPE
-#undef BPF_LINK_TYPE
-#undef BPF_MAP_TYPE
-};
-
-static int btf_vmlinux_map_ids_init(const struct btf *btf,
- struct bpf_verifier_log *log)
-{
- const struct bpf_map_ops *ops;
- int i, btf_id;
-
- for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
- ops = btf_vmlinux_map_ops[i];
- if (!ops || (!ops->map_btf_name && !ops->map_btf_id))
- continue;
- if (!ops->map_btf_name || !ops->map_btf_id) {
- bpf_log(log, "map type %d is misconfigured\n", i);
- return -EINVAL;
- }
- btf_id = btf_find_by_name_kind(btf, ops->map_btf_name,
- BTF_KIND_STRUCT);
- if (btf_id < 0)
- return btf_id;
- *ops->map_btf_id = btf_id;
- }
-
- return 0;
-}
-
static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
struct btf *btf,
const struct btf_type *t,
@@ -4715,14 +5090,13 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, 1);
+ if (err)
+ goto errout;
+
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
- /* find bpf map structs for map_ptr access checking */
- err = btf_vmlinux_map_ids_init(btf, log);
- if (err < 0)
- goto errout;
-
bpf_struct_ops_init(btf, log);
refcount_set(&btf->refcnt, 1);
@@ -4800,6 +5174,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
if (err)
goto errout;
+ err = btf_check_type_tags(env, btf, btf_nr_types(base_btf));
+ if (err)
+ goto errout;
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -4848,6 +5226,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
+ const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -5000,6 +5379,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
+
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (strcmp(tag_value, "user") == 0)
+ info->reg_type |= MEM_USER;
+ if (strcmp(tag_value, "percpu") == 0)
+ info->reg_type |= MEM_PERCPU;
+ }
+
/* skip modifiers */
while (btf_type_is_modifier(t)) {
info->btf_id = t->type;
@@ -5026,12 +5414,12 @@ enum bpf_struct_walk_result {
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const char *tname, *mname;
+ const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
again:
@@ -5215,7 +5603,8 @@ error:
}
if (btf_type_is_ptr(mtype)) {
- const struct btf_type *stype;
+ const struct btf_type *stype, *t;
+ enum bpf_type_flag tmp_flag = 0;
u32 id;
if (msize != size || off != moff) {
@@ -5224,9 +5613,23 @@ error:
mname, moff, tname, off, size);
return -EACCES;
}
+
+ /* check type tag */
+ t = btf_type_by_id(btf, mtype->type);
+ if (btf_type_is_type_tag(t)) {
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ /* check __user tag */
+ if (strcmp(tag_value, "user") == 0)
+ tmp_flag = MEM_USER;
+ /* check __percpu tag */
+ if (strcmp(tag_value, "percpu") == 0)
+ tmp_flag = MEM_PERCPU;
+ }
+
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
+ *flag = tmp_flag;
return WALK_PTR;
}
}
@@ -5253,13 +5656,14 @@ error:
int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype __maybe_unused,
- u32 *next_btf_id)
+ u32 *next_btf_id, enum bpf_type_flag *flag)
{
+ enum bpf_type_flag tmp_flag = 0;
int err;
u32 id;
do {
- err = btf_struct_walk(log, btf, t, off, size, &id);
+ err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
switch (err) {
case WALK_PTR:
@@ -5267,6 +5671,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
* we're done.
*/
*next_btf_id = id;
+ *flag = tmp_flag;
return PTR_TO_BTF_ID;
case WALK_SCALAR:
return SCALAR_VALUE;
@@ -5308,20 +5713,27 @@ static bool btf_types_are_same(const struct btf *btf1, u32 id1,
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
- const struct btf *need_btf, u32 need_type_id)
+ const struct btf *need_btf, u32 need_type_id,
+ bool strict)
{
const struct btf_type *type;
+ enum bpf_type_flag flag;
int err;
/* Are we already done? */
if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
return true;
-
+ /* In case of strict type match, we do not walk struct, the top level
+ * type match must succeed. When strict is true, off should have already
+ * been 0.
+ */
+ if (strict)
+ return false;
again:
type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, btf, type, off, 1, &id);
+ err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
if (err != WALK_STRUCT)
return false;
@@ -5385,7 +5797,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
}
args = (const struct btf_param *)(func + 1);
nargs = btf_type_vlen(func);
- if (nargs >= MAX_BPF_FUNC_ARGS) {
+ if (nargs > MAX_BPF_FUNC_ARGS) {
bpf_log(log,
"The function %s has %d arguments. Too many.\n",
tname, nargs);
@@ -5616,17 +6028,46 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
return true;
}
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ int len, sfx_len = sizeof("__sz") - 1;
+ const struct btf_type *t;
+ const char *param_name;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ /* In the future, this can be ported to use BTF tagging */
+ param_name = btf_name_by_offset(btf, arg->name_off);
+ if (str_is_empty(param_name))
+ return false;
+ len = strlen(param_name);
+ if (len < sfx_len)
+ return false;
+ param_name += len - sfx_len;
+ if (strncmp(param_name, "__sz", sfx_len))
+ return false;
+
+ return true;
+}
+
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
bool ptr_to_mem_ok)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
struct bpf_verifier_log *log = &env->log;
+ u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
+ bool rel = false, kptr_get = false;
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
- u32 i, nargs, ref_id;
+ int ref_regno = 0, ret;
t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
@@ -5652,10 +6093,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (is_kfunc) {
+ /* Only kfunc can be release func */
+ rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RELEASE, func_id);
+ kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id);
+ }
+
/* check that BTF function arguments match actual types that the
* verifier sees.
*/
for (i = 0; i < nargs; i++) {
+ enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno];
@@ -5675,8 +6125,59 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
- if (btf_get_prog_ctx_type(log, btf, t,
- env->prog->type, i)) {
+
+ if (rel && reg->ref_obj_id)
+ arg_type |= OBJ_RELEASE;
+ ret = check_func_arg_reg_off(env, reg, regno, arg_type);
+ if (ret < 0)
+ return ret;
+
+ /* kptr_get is only true for kfunc */
+ if (i == 0 && kptr_get) {
+ struct bpf_map_value_off_desc *off_desc;
+
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ bpf_log(log, "arg#0 expected pointer to map value\n");
+ return -EINVAL;
+ }
+
+ /* check_func_arg_reg_off allows var_off for
+ * PTR_TO_MAP_VALUE, but we need fixed offset to find
+ * off_desc.
+ */
+ if (!tnum_is_const(reg->var_off)) {
+ bpf_log(log, "arg#0 must have constant offset\n");
+ return -EINVAL;
+ }
+
+ off_desc = bpf_map_kptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value);
+ if (!off_desc || off_desc->type != BPF_KPTR_REF) {
+ bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n",
+ reg->off + reg->var_off.value);
+ return -EINVAL;
+ }
+
+ if (!btf_type_is_ptr(ref_t)) {
+ bpf_log(log, "arg#0 BTF type must be a double pointer\n");
+ return -EINVAL;
+ }
+
+ ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+
+ if (!btf_type_is_struct(ref_t)) {
+ bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
+ func_name, i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->kptr.btf,
+ off_desc->kptr.btf_id, true)) {
+ bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n",
+ func_name, i, btf_type_str(ref_t), ref_tname);
+ return -EINVAL;
+ }
+ /* rest of the arguments can be anything, like normal kfunc */
+ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
/* If function expects ctx type in BTF check that caller
* is passing PTR_TO_CTX.
*/
@@ -5686,8 +6187,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
- if (check_ptr_off_reg(env, reg, regno))
- return -EINVAL;
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
(reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
const struct btf_type *reg_ref_t;
@@ -5705,6 +6204,16 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
+ /* Ensure only one argument is referenced PTR_TO_BTF_ID */
+ if (reg->ref_obj_id) {
+ if (ref_obj_id) {
+ bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id, ref_obj_id);
+ return -EFAULT;
+ }
+ ref_regno = regno;
+ ref_obj_id = reg->ref_obj_id;
+ }
} else {
reg_btf = btf_vmlinux;
reg_ref_id = *reg2btf_ids[base_type(reg->type)];
@@ -5715,7 +6224,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_tname = btf_name_by_offset(reg_btf,
reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
- reg->off, btf, ref_id)) {
+ reg->off, btf, ref_id, rel && reg->ref_obj_id)) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i,
btf_type_str(ref_t), ref_tname,
@@ -5728,17 +6237,33 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
u32 type_size;
if (is_kfunc) {
+ bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+
/* Permit pointer to mem, but only when argument
* type is pointer to scalar, or struct composed
* (recursively) of scalars.
+ * When arg_mem_size is true, the pointer can be
+ * void *.
*/
if (!btf_type_is_scalar(ref_t) &&
- !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) {
+ !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
+ (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
bpf_log(log,
- "arg#%d pointer type %s %s must point to scalar or struct with scalar\n",
- i, btf_type_str(ref_t), ref_tname);
+ "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+ i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
return -EINVAL;
}
+
+ /* Check for mem, len pair */
+ if (arg_mem_size) {
+ if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
+ bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
+ i, i + 1);
+ return -EINVAL;
+ }
+ i++;
+ continue;
+ }
}
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
@@ -5759,7 +6284,20 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
}
}
- return 0;
+ /* Either both are set, or neither */
+ WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
+ /* We already made sure ref_obj_id is set only for one argument. We do
+ * allow (!rel && ref_obj_id), so that passing such referenced
+ * PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when
+ * is_kfunc is true.
+ */
+ if (rel && !ref_obj_id) {
+ bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+ func_name);
+ return -EINVAL;
+ }
+ /* returns argument register number > 0 in case of reference release kfunc */
+ return rel ? ref_regno : 0;
}
/* Compare BTF of a function with given bpf_reg_state.
@@ -6005,7 +6543,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
- /* If we encontered an error, return it. */
+ /* If we encountered an error, return it. */
if (ssnprintf.show.state.status)
return ssnprintf.show.state.status;
@@ -6201,12 +6739,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+enum {
+ BTF_MODULE_F_LIVE = (1 << 0),
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module {
struct list_head list;
struct module *module;
struct btf *btf;
struct bin_attribute *sysfs_attr;
+ int flags;
};
static LIST_HEAD(btf_modules);
@@ -6234,7 +6777,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
int err = 0;
if (mod->btf_data_size == 0 ||
- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
+ op != MODULE_STATE_GOING))
goto out;
switch (op) {
@@ -6249,7 +6793,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
pr_warn("failed to validate module [%s] BTF: %ld\n",
mod->name, PTR_ERR(btf));
kfree(btf_mod);
- err = PTR_ERR(btf);
+ if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
+ err = PTR_ERR(btf);
goto out;
}
err = btf_alloc_id(btf);
@@ -6293,6 +6838,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
}
break;
+ case MODULE_STATE_LIVE:
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_mod->flags |= BTF_MODULE_F_LIVE;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+ break;
case MODULE_STATE_GOING:
mutex_lock(&btf_module_mutex);
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
@@ -6339,7 +6895,12 @@ struct module *btf_try_get_module(const struct btf *btf)
if (btf_mod->btf != btf)
continue;
- if (try_module_get(btf_mod->module))
+ /* We must only consider module whose __init routine has
+ * finished, hence we must check for BTF_MODULE_F_LIVE flag,
+ * which is set from the notifier callback for
+ * MODULE_STATE_LIVE.
+ */
+ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
res = btf_mod->module;
break;
@@ -6350,9 +6911,43 @@ struct module *btf_try_get_module(const struct btf *btf)
return res;
}
+/* Returns struct btf corresponding to the struct module.
+ * This function can return NULL or ERR_PTR.
+ */
+static struct btf *btf_get_module_btf(const struct module *module)
+{
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ struct btf_module *btf_mod, *tmp;
+#endif
+ struct btf *btf = NULL;
+
+ if (!module) {
+ btf = bpf_get_btf_vmlinux();
+ if (!IS_ERR_OR_NULL(btf))
+ btf_get(btf);
+ return btf;
+ }
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ mutex_lock(&btf_module_mutex);
+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+ if (btf_mod->module != module)
+ continue;
+
+ btf_get(btf_mod->btf);
+ btf = btf_mod->btf;
+ break;
+ }
+ mutex_unlock(&btf_module_mutex);
+#endif
+
+ return btf;
+}
+
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
{
- struct btf *btf;
+ struct btf *btf = NULL;
+ int btf_obj_fd = 0;
long ret;
if (flags)
@@ -6361,44 +6956,17 @@ BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int
if (name_sz <= 1 || name[name_sz - 1])
return -EINVAL;
- btf = bpf_get_btf_vmlinux();
- if (IS_ERR(btf))
- return PTR_ERR(btf);
-
- ret = btf_find_by_name_kind(btf, name, kind);
- /* ret is never zero, since btf_find_by_name_kind returns
- * positive btf_id or negative error.
- */
- if (ret < 0) {
- struct btf *mod_btf;
- int id;
-
- /* If name is not found in vmlinux's BTF then search in module's BTFs */
- spin_lock_bh(&btf_idr_lock);
- idr_for_each_entry(&btf_idr, mod_btf, id) {
- if (!btf_is_module(mod_btf))
- continue;
- /* linear search could be slow hence unlock/lock
- * the IDR to avoiding holding it for too long
- */
- btf_get(mod_btf);
- spin_unlock_bh(&btf_idr_lock);
- ret = btf_find_by_name_kind(mod_btf, name, kind);
- if (ret > 0) {
- int btf_obj_fd;
-
- btf_obj_fd = __btf_new_fd(mod_btf);
- if (btf_obj_fd < 0) {
- btf_put(mod_btf);
- return btf_obj_fd;
- }
- return ret | (((u64)btf_obj_fd) << 32);
- }
- spin_lock_bh(&btf_idr_lock);
- btf_put(mod_btf);
+ ret = bpf_find_btf_id(name, kind, &btf);
+ if (ret > 0 && btf_is_module(btf)) {
+ btf_obj_fd = __btf_new_fd(btf);
+ if (btf_obj_fd < 0) {
+ btf_put(btf);
+ return btf_obj_fd;
}
- spin_unlock_bh(&btf_idr_lock);
+ return ret | (((u64)btf_obj_fd) << 32);
}
+ if (ret > 0)
+ btf_put(btf);
return ret;
}
@@ -6417,58 +6985,434 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
-/* BTF ID set registration API for modules */
+/* Kernel Function (kfunc) BTF ID set registration API */
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ struct btf_id_set *add_set, bool vmlinux_set)
+{
+ struct btf_kfunc_set_tab *tab;
+ struct btf_id_set *set;
+ u32 set_cnt;
+ int ret;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (!add_set->cnt)
+ return 0;
+
+ tab = btf->kfunc_set_tab;
+ if (!tab) {
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
+ if (!tab)
+ return -ENOMEM;
+ btf->kfunc_set_tab = tab;
+ }
+
+ set = tab->sets[hook][type];
+ /* Warn when register_btf_kfunc_id_set is called twice for the same hook
+ * for module sets.
+ */
+ if (WARN_ON_ONCE(set && !vmlinux_set)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* We don't need to allocate, concatenate, and sort module sets, because
+ * only one is allowed per hook. Hence, we can directly assign the
+ * pointer and return.
+ */
+ if (!vmlinux_set) {
+ tab->sets[hook][type] = add_set;
+ return 0;
+ }
+
+ /* In case of vmlinux sets, there may be more than one set being
+ * registered per hook. To create a unified set, we allocate a new set
+ * and concatenate all individual sets being registered. While each set
+ * is individually sorted, they may become unsorted when concatenated,
+ * hence re-sorting the final set again is required to make binary
+ * searching the set using btf_id_set_contains function work.
+ */
+ set_cnt = set ? set->cnt : 0;
+
+ if (set_cnt > U32_MAX - add_set->cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Grow set */
+ set = krealloc(tab->sets[hook][type],
+ offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!set) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* For newly allocated set, initialize set->cnt to 0 */
+ if (!tab->sets[hook][type])
+ set->cnt = 0;
+ tab->sets[hook][type] = set;
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+ /* Concatenate the two sets */
+ memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+ set->cnt += add_set->cnt;
+
+ sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_kfunc_set_tab(btf);
+ return ret;
+}
+
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ const struct btf_kfunc_id_set *kset)
+{
+ bool vmlinux_set = !btf_is_module(btf);
+ int type, ret = 0;
+
+ for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
+ if (!kset->sets[type])
+ continue;
+
+ ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+ enum btf_kfunc_hook hook,
+ enum btf_kfunc_type type,
+ u32 kfunc_btf_id)
{
- mutex_lock(&l->mutex);
- list_add(&s->list, &l->list);
- mutex_unlock(&l->mutex);
+ struct btf_id_set *set;
+
+ if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
+ return false;
+ if (!btf->kfunc_set_tab)
+ return false;
+ set = btf->kfunc_set_tab->sets[hook][type];
+ if (!set)
+ return false;
+ return btf_id_set_contains(set, kfunc_btf_id);
+}
+
+static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
+{
+ switch (prog_type) {
+ case BPF_PROG_TYPE_XDP:
+ return BTF_KFUNC_HOOK_XDP;
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return BTF_KFUNC_HOOK_TC;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ return BTF_KFUNC_HOOK_STRUCT_OPS;
+ case BPF_PROG_TYPE_TRACING:
+ return BTF_KFUNC_HOOK_TRACING;
+ case BPF_PROG_TYPE_SYSCALL:
+ return BTF_KFUNC_HOOK_SYSCALL;
+ default:
+ return BTF_KFUNC_HOOK_MAX;
+ }
}
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
- struct kfunc_btf_id_set *s)
+/* Caution:
+ * Reference to the module (obtained using btf_try_get_module) corresponding to
+ * the struct btf *MUST* be held when calling this function from verifier
+ * context. This is usually true as we stash references in prog's kfunc_btf_tab;
+ * keeping the reference for the duration of the call provides the necessary
+ * protection for looking up a well-formed btf->kfunc_set_tab.
+ */
+bool btf_kfunc_id_set_contains(const struct btf *btf,
+ enum bpf_prog_type prog_type,
+ enum btf_kfunc_type type, u32 kfunc_btf_id)
{
- mutex_lock(&l->mutex);
- list_del_init(&s->list);
- mutex_unlock(&l->mutex);
+ enum btf_kfunc_hook hook;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
}
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
- struct module *owner)
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
{
- struct kfunc_btf_id_set *s;
+ enum btf_kfunc_hook hook;
+ struct btf *btf;
+ int ret;
- mutex_lock(&klist->mutex);
- list_for_each_entry(s, &klist->list, list) {
- if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
- mutex_unlock(&klist->mutex);
- return true;
+ btf = btf_get_module_btf(kset->owner);
+ if (!btf) {
+ if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register kfuncs\n");
+ return -ENOENT;
}
+ if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+ pr_err("missing module BTF, cannot register kfuncs\n");
+ return -ENOENT;
+ }
+ return 0;
}
- mutex_unlock(&klist->mutex);
- return false;
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ ret = btf_populate_kfunc_set(btf, hook, kset);
+ btf_put(btf);
+ return ret;
}
+EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
-#define DEFINE_KFUNC_BTF_ID_LIST(name) \
- struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
- __MUTEX_INITIALIZER(name.mutex) }; \
- EXPORT_SYMBOL_GPL(name)
+s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
+{
+ struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
+ struct btf_id_dtor_kfunc *dtor;
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+ if (!tab)
+ return -ENOENT;
+ /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need
+ * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func.
+ */
+ BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0);
+ dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func);
+ if (!dtor)
+ return -ENOENT;
+ return dtor->kfunc_btf_id;
+}
-#endif
+static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt)
+{
+ const struct btf_type *dtor_func, *dtor_func_proto, *t;
+ const struct btf_param *args;
+ s32 dtor_btf_id;
+ u32 nr_args, i;
+
+ for (i = 0; i < cnt; i++) {
+ dtor_btf_id = dtors[i].kfunc_btf_id;
+
+ dtor_func = btf_type_by_id(btf, dtor_btf_id);
+ if (!dtor_func || !btf_type_is_func(dtor_func))
+ return -EINVAL;
+
+ dtor_func_proto = btf_type_by_id(btf, dtor_func->type);
+ if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto))
+ return -EINVAL;
+
+ /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
+ t = btf_type_by_id(btf, dtor_func_proto->type);
+ if (!t || !btf_type_is_void(t))
+ return -EINVAL;
+
+ nr_args = btf_type_vlen(dtor_func_proto);
+ if (nr_args != 1)
+ return -EINVAL;
+ args = btf_params(dtor_func_proto);
+ t = btf_type_by_id(btf, args[0].type);
+ /* Allow any pointer type, as width on targets Linux supports
+ * will be same for all pointer types (i.e. sizeof(void *))
+ */
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
+ struct module *owner)
+{
+ struct btf_id_dtor_kfunc_tab *tab;
+ struct btf *btf;
+ u32 tab_cnt;
+ int ret;
+ btf = btf_get_module_btf(owner);
+ if (!btf) {
+ if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n");
+ return -ENOENT;
+ }
+ if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
+ pr_err("missing module BTF, cannot register dtor kfuncs\n");
+ return -ENOENT;
+ }
+ return 0;
+ }
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
+ pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
+ ret = -E2BIG;
+ goto end;
+ }
+
+ /* Ensure that the prototype of dtor kfuncs being registered is sane */
+ ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt);
+ if (ret < 0)
+ goto end;
+
+ tab = btf->dtor_kfunc_tab;
+ /* Only one call allowed for modules */
+ if (WARN_ON_ONCE(tab && btf_is_module(btf))) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ tab_cnt = tab ? tab->cnt : 0;
+ if (tab_cnt > U32_MAX - add_cnt) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+ if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
+ pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
+ ret = -E2BIG;
+ goto end;
+ }
+
+ tab = krealloc(btf->dtor_kfunc_tab,
+ offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!tab) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ if (!btf->dtor_kfunc_tab)
+ tab->cnt = 0;
+ btf->dtor_kfunc_tab = tab;
+
+ memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0]));
+ tab->cnt += add_cnt;
+
+ sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
+
+ return 0;
+end:
+ btf_free_dtor_kfunc_tab(btf);
+ btf_put(btf);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
+
+#define MAX_TYPES_ARE_COMPAT_DEPTH 2
+
+static
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id,
+ int level)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf_type_by_id(local_btf, local_id);
+ targ_type = btf_type_by_id(targ_btf, targ_id);
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
+ targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ if (level <= 0)
+ return -EINVAL;
+
+ btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
+ btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
+ err = __bpf_core_types_are_compat(local_btf, local_id,
+ targ_btf, targ_id,
+ level - 1);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
+ btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ return 0;
+ }
+}
+
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
const struct btf *targ_btf, __u32 targ_id)
{
- return -EOPNOTSUPP;
+ return __bpf_core_types_are_compat(local_btf, local_id,
+ targ_btf, targ_id,
+ MAX_TYPES_ARE_COMPAT_DEPTH);
}
static bool bpf_core_is_flavor_sep(const char *s)
@@ -6711,6 +7655,8 @@ bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
main_btf = bpf_get_btf_vmlinux();
if (IS_ERR(main_btf))
return ERR_CAST(main_btf);
+ if (!main_btf)
+ return ERR_PTR(-EINVAL);
local_type = btf_type_by_id(local_btf, local_type_id);
if (!local_type)
@@ -6789,6 +7735,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
{
bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL;
struct bpf_core_cand_list cands = {};
+ struct bpf_core_relo_res targ_res;
struct bpf_core_spec *specs;
int err;
@@ -6828,13 +7775,19 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
cands.len = cc->cnt;
/* cand_cache_mutex needs to span the cache lookup and
* copy of btf pointer into bpf_core_cand_list,
- * since module can be unloaded while bpf_core_apply_relo_insn
+ * since module can be unloaded while bpf_core_calc_relo_insn
* is working with module's btf.
*/
}
- err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8,
- relo, relo_idx, ctx->btf, &cands, specs);
+ err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs,
+ &targ_res);
+ if (err)
+ goto out;
+
+ err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx,
+ &targ_res);
+
out:
kfree(specs);
if (need_cands) {
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 514b4681a90a..afb414b26d01 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,6 +22,45 @@
DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+/* __always_inline is necessary to prevent indirect call through run_prog
+ * function pointer.
+ */
+static __always_inline int
+bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
+ enum cgroup_bpf_attach_type atype,
+ const void *ctx, bpf_prog_run_fn run_prog,
+ int retval, u32 *ret_flags)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 func_ret;
+
+ run_ctx.retval = retval;
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(cgrp->effective[atype]);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ func_ret = run_prog(prog, ctx);
+ if (ret_flags) {
+ *(ret_flags) |= (func_ret >> 1);
+ func_ret &= 1;
+ }
+ if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return run_ctx.retval;
+}
+
void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
@@ -1031,7 +1070,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
* @sk: The socket sending or receiving traffic
* @skb: The skb that is being sent or received
- * @type: The type of program to be exectuted
+ * @type: The type of program to be executed
*
* If no socket is passed, or the socket is not of type INET or INET6,
* this function does nothing and returns 0.
@@ -1044,7 +1083,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
* NET_XMIT_CN (2) - continue with packet output and notify TCP
* to call cwr
- * -EPERM - drop packet
+ * -err - drop packet
*
* For ingress packets, this function will return -EPERM if any
* attached program was found and if it returned != 1 during execution.
@@ -1075,12 +1114,40 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
bpf_compute_and_save_data_end(skb, &saved_data_end);
if (atype == CGROUP_INET_EGRESS) {
- ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
- cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
+ u32 flags = 0;
+ bool cn;
+
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
+ __bpf_prog_run_save_cb, 0, &flags);
+
+ /* Return values of CGROUP EGRESS BPF programs are:
+ * 0: drop packet
+ * 1: keep packet
+ * 2: drop packet and cn
+ * 3: keep packet and cn
+ *
+ * The returned value is then converted to one of the NET_XMIT
+ * or an error code that is then interpreted as drop packet
+ * (and no cn):
+ * 0: NET_XMIT_SUCCESS skb should be transmitted
+ * 1: NET_XMIT_DROP skb should be dropped and cn
+ * 2: NET_XMIT_CN skb should be transmitted and cn
+ * 3: -err skb should be dropped
+ */
+
+ cn = flags & BPF_RET_SET_CN;
+ if (ret && !IS_ERR_VALUE((long)ret))
+ ret = -EFAULT;
+ if (!ret)
+ ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
+ else
+ ret = (cn ? NET_XMIT_DROP : ret);
} else {
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
- __bpf_prog_run_save_cb);
- ret = (ret == 1 ? 0 : -EPERM);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
+ skb, __bpf_prog_run_save_cb, 0,
+ NULL);
+ if (ret && !IS_ERR_VALUE((long)ret))
+ ret = -EFAULT;
}
bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
@@ -1093,7 +1160,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
/**
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
* @sk: sock structure to manipulate
- * @type: The type of program to be exectuted
+ * @type: The type of program to be executed
*
* socket is passed is expected to be of type INET or INET6.
*
@@ -1107,10 +1174,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
+ NULL);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1119,7 +1185,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
- * @type: The type of program to be exectuted
+ * @type: The type of program to be executed
* @t_ctx: Pointer to attach type specific context
* @flags: Pointer to u32 which contains higher bits of BPF program
* return value (OR'ed together).
@@ -1142,7 +1208,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
};
struct sockaddr_storage unspec;
struct cgroup *cgrp;
- int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
@@ -1156,10 +1221,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, flags);
-
- return ret == 1 ? 0 : -EPERM;
+ return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
+ 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
@@ -1169,7 +1232,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
* sk with connection information (IP addresses, etc.) May not contain
* cgroup info if it is a req sock.
- * @type: The type of program to be exectuted
+ * @type: The type of program to be executed
*
* socket passed is expected to be of type INET or INET6.
*
@@ -1184,11 +1247,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- int ret;
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
- bpf_prog_run);
- return ret == 1 ? 0 : -EPERM;
+ return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
+ 0, NULL);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
@@ -1201,17 +1262,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
.major = major,
.minor = minor,
};
- int allow;
+ int ret;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
+ NULL);
rcu_read_unlock();
- return !allow;
+ return ret;
}
+BPF_CALL_0(bpf_get_retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ return ctx->retval;
+}
+
+static const struct bpf_func_proto bpf_get_retval_proto = {
+ .func = bpf_get_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+BPF_CALL_1(bpf_set_retval, int, retval)
+{
+ struct bpf_cg_run_ctx *ctx =
+ container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+
+ ctx->retval = retval;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_set_retval_proto = {
+ .func = bpf_set_retval,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1224,6 +1315,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
+ case BPF_FUNC_get_retval:
+ return &bpf_get_retval_proto;
+ case BPF_FUNC_set_retval:
+ return &bpf_set_retval_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -1337,7 +1432,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
+ NULL);
rcu_read_unlock();
kfree(ctx.cur_val);
@@ -1350,24 +1446,10 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
kfree(ctx.new_val);
}
- return ret == 1 ? 0 : -EPERM;
+ return ret;
}
#ifdef CONFIG_NET
-static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
- enum cgroup_bpf_attach_type attach_type)
-{
- struct bpf_prog_array *prog_array;
- bool empty;
-
- rcu_read_lock();
- prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
- empty = bpf_prog_array_is_empty(prog_array);
- rcu_read_unlock();
-
- return empty;
-}
-
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
struct bpf_sockopt_buf *buf)
{
@@ -1426,19 +1508,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
};
int ret, max_optlen;
- /* Opportunistic check to see whether we have any BPF program
- * attached to the hook so we don't waste time allocating
- * memory and locking the socket.
- */
- if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
- return 0;
-
/* Allocate a bit more than the initial user buffer for
* BPF program. The canonical use case is overriding
* TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
*/
max_optlen = max_t(int, 16, *optlen);
-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
if (max_optlen < 0)
return max_optlen;
@@ -1451,14 +1525,12 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
- &ctx, bpf_prog_run);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
+ &ctx, bpf_prog_run, 0, NULL);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret)
goto out;
- }
if (ctx.optlen == -1) {
/* optlen set to -1, bypass kernel */
@@ -1518,19 +1590,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
+ .current_task = current,
};
int ret;
- /* Opportunistic check to see whether we have any BPF program
- * attached to the hook so we don't waste time allocating
- * memory and locking the socket.
- */
- if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
- return retval;
-
ctx.optlen = max_optlen;
-
max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
if (max_optlen < 0)
return max_optlen;
@@ -1561,28 +1625,18 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
+ &ctx, bpf_prog_run, retval, NULL);
release_sock(sk);
- if (!ret) {
- ret = -EPERM;
+ if (ret < 0)
goto out;
- }
if (ctx.optlen > max_optlen || ctx.optlen < 0) {
ret = -EFAULT;
goto out;
}
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval) {
- ret = -EFAULT;
- goto out;
- }
-
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) {
@@ -1591,8 +1645,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
}
- ret = ctx.retval;
-
out:
sockopt_free_buf(&ctx, &buf);
return ret;
@@ -1607,10 +1659,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
.sk = sk,
.level = level,
.optname = optname,
- .retval = retval,
.optlen = *optlen,
.optval = optval,
.optval_end = optval + *optlen,
+ .current_task = current,
};
int ret;
@@ -1622,26 +1674,20 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
* be called if that data shouldn't be "exported".
*/
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
- &ctx, bpf_prog_run);
- if (!ret)
- return -EPERM;
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
+ &ctx, bpf_prog_run, retval, NULL);
+ if (ret < 0)
+ return ret;
if (ctx.optlen > *optlen)
return -EFAULT;
- /* BPF programs only allowed to set retval to 0, not some
- * arbitrary value.
- */
- if (ctx.retval != 0 && ctx.retval != retval)
- return -EFAULT;
-
/* BPF programs can shrink the buffer, export the modifications.
*/
if (ctx.optlen != 0)
*optlen = ctx.optlen;
- return ctx.retval;
+ return ret;
}
#endif
@@ -2057,10 +2103,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break;
case offsetof(struct bpf_sockopt, retval):
- if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
- else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+ BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+ if (type == BPF_WRITE) {
+ int treg = BPF_REG_9;
+
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ if (si->src_reg == treg || si->dst_reg == treg)
+ --treg;
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ treg, treg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ treg, si->src_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+ offsetof(struct bpf_sockopt_kern, tmp_reg));
+ } else {
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sockopt_kern, current_task));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct task_struct, bpf_ctx));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct bpf_cg_run_ctx, retval));
+ }
break;
case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index de3e5bc6781f..5f6f3f829b36 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -33,6 +33,7 @@
#include <linux/extable.h>
#include <linux/log2.h>
#include <linux/bpf_verifier.h>
+#include <linux/nodemask.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -105,6 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->aux = aux;
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
+ fp->blinding_requested = bpf_jit_blinding_enabled(fp);
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
mutex_init(&fp->aux->used_maps_mutex);
@@ -537,13 +539,10 @@ long bpf_jit_limit_max __read_mostly;
static void
bpf_prog_ksym_set_addr(struct bpf_prog *prog)
{
- const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
- unsigned long addr = (unsigned long)hdr;
-
WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
prog->aux->ksym.start = (unsigned long) prog->bpf_func;
- prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE;
+ prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len;
}
static void
@@ -808,6 +807,181 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
return slot;
}
+/*
+ * BPF program pack allocator.
+ *
+ * Most BPF programs are pretty small. Allocating a hole page for each
+ * program is sometime a waste. Many small bpf program also adds pressure
+ * to instruction TLB. To solve this issue, we introduce a BPF program pack
+ * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86)
+ * to host BPF programs.
+ */
+#define BPF_PROG_CHUNK_SHIFT 6
+#define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT)
+#define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1))
+
+struct bpf_prog_pack {
+ struct list_head list;
+ void *ptr;
+ unsigned long bitmap[];
+};
+
+#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
+
+static size_t bpf_prog_pack_size = -1;
+static size_t bpf_prog_pack_mask = -1;
+
+static int bpf_prog_chunk_count(void)
+{
+ WARN_ON_ONCE(bpf_prog_pack_size == -1);
+ return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE;
+}
+
+static DEFINE_MUTEX(pack_mutex);
+static LIST_HEAD(pack_list);
+
+/* PMD_SIZE is not available in some special config, e.g. ARCH=arm with
+ * CONFIG_MMU=n. Use PAGE_SIZE in these cases.
+ */
+#ifdef PMD_SIZE
+#define BPF_HPAGE_SIZE PMD_SIZE
+#define BPF_HPAGE_MASK PMD_MASK
+#else
+#define BPF_HPAGE_SIZE PAGE_SIZE
+#define BPF_HPAGE_MASK PAGE_MASK
+#endif
+
+static size_t select_bpf_prog_pack_size(void)
+{
+ size_t size;
+ void *ptr;
+
+ size = BPF_HPAGE_SIZE * num_online_nodes();
+ ptr = module_alloc(size);
+
+ /* Test whether we can get huge pages. If not just use PAGE_SIZE
+ * packs.
+ */
+ if (!ptr || !is_vm_area_hugepages(ptr)) {
+ size = PAGE_SIZE;
+ bpf_prog_pack_mask = PAGE_MASK;
+ } else {
+ bpf_prog_pack_mask = BPF_HPAGE_MASK;
+ }
+
+ vfree(ptr);
+ return size;
+}
+
+static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+ struct bpf_prog_pack *pack;
+
+ pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())),
+ GFP_KERNEL);
+ if (!pack)
+ return NULL;
+ pack->ptr = module_alloc(bpf_prog_pack_size);
+ if (!pack->ptr) {
+ kfree(pack);
+ return NULL;
+ }
+ bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size);
+ bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
+ list_add_tail(&pack->list, &pack_list);
+
+ set_vm_flush_reset_perms(pack->ptr);
+ set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+ return pack;
+}
+
+static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+ unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
+ struct bpf_prog_pack *pack;
+ unsigned long pos;
+ void *ptr = NULL;
+
+ mutex_lock(&pack_mutex);
+ if (bpf_prog_pack_size == -1)
+ bpf_prog_pack_size = select_bpf_prog_pack_size();
+
+ if (size > bpf_prog_pack_size) {
+ size = round_up(size, PAGE_SIZE);
+ ptr = module_alloc(size);
+ if (ptr) {
+ bpf_fill_ill_insns(ptr, size);
+ set_vm_flush_reset_perms(ptr);
+ set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
+ set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+ }
+ goto out;
+ }
+ list_for_each_entry(pack, &pack_list, list) {
+ pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
+ nbits, 0);
+ if (pos < bpf_prog_chunk_count())
+ goto found_free_area;
+ }
+
+ pack = alloc_new_pack(bpf_fill_ill_insns);
+ if (!pack)
+ goto out;
+
+ pos = 0;
+
+found_free_area:
+ bitmap_set(pack->bitmap, pos, nbits);
+ ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
+
+out:
+ mutex_unlock(&pack_mutex);
+ return ptr;
+}
+
+static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+{
+ struct bpf_prog_pack *pack = NULL, *tmp;
+ unsigned int nbits;
+ unsigned long pos;
+ void *pack_ptr;
+
+ mutex_lock(&pack_mutex);
+ if (hdr->size > bpf_prog_pack_size) {
+ module_memfree(hdr);
+ goto out;
+ }
+
+ pack_ptr = (void *)((unsigned long)hdr & bpf_prog_pack_mask);
+
+ list_for_each_entry(tmp, &pack_list, list) {
+ if (tmp->ptr == pack_ptr) {
+ pack = tmp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
+ goto out;
+
+ nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
+ pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+
+ WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
+ "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
+
+ bitmap_clear(pack->bitmap, pos, nbits);
+ if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
+ bpf_prog_chunk_count(), 0) == 0) {
+ list_del(&pack->list);
+ module_memfree(pack->ptr);
+ kfree(pack);
+ }
+out:
+ mutex_unlock(&pack_mutex);
+}
+
static atomic_long_t bpf_jit_current;
/* Can be overridden by an arch's JIT compiler if it has a custom,
@@ -833,12 +1007,11 @@ static int __init bpf_jit_charge_init(void)
}
pure_initcall(bpf_jit_charge_init);
-int bpf_jit_charge_modmem(u32 pages)
+int bpf_jit_charge_modmem(u32 size)
{
- if (atomic_long_add_return(pages, &bpf_jit_current) >
- (bpf_jit_limit >> PAGE_SHIFT)) {
+ if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
if (!bpf_capable()) {
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
return -EPERM;
}
}
@@ -846,9 +1019,9 @@ int bpf_jit_charge_modmem(u32 pages)
return 0;
}
-void bpf_jit_uncharge_modmem(u32 pages)
+void bpf_jit_uncharge_modmem(u32 size)
{
- atomic_long_sub(pages, &bpf_jit_current);
+ atomic_long_sub(size, &bpf_jit_current);
}
void *__weak bpf_jit_alloc_exec(unsigned long size)
@@ -867,7 +1040,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_binary_header *hdr;
- u32 size, hole, start, pages;
+ u32 size, hole, start;
WARN_ON_ONCE(!is_power_of_2(alignment) ||
alignment > BPF_IMAGE_ALIGNMENT);
@@ -877,20 +1050,19 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
* random section of illegal instructions.
*/
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
- pages = size / PAGE_SIZE;
- if (bpf_jit_charge_modmem(pages))
+ if (bpf_jit_charge_modmem(size))
return NULL;
hdr = bpf_jit_alloc_exec(size);
if (!hdr) {
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(size);
return NULL;
}
/* Fill space with illegal/arch-dep instructions. */
bpf_fill_ill_insns(hdr, size);
- hdr->pages = pages;
+ hdr->size = size;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -903,10 +1075,117 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
- u32 pages = hdr->pages;
+ u32 size = hdr->size;
bpf_jit_free_exec(hdr);
- bpf_jit_uncharge_modmem(pages);
+ bpf_jit_uncharge_modmem(size);
+}
+
+/* Allocate jit binary from bpf_prog_pack allocator.
+ * Since the allocated memory is RO+X, the JIT engine cannot write directly
+ * to the memory. To solve this problem, a RW buffer is also allocated at
+ * as the same time. The JIT engine should calculate offsets based on the
+ * RO memory address, but write JITed program to the RW buffer. Once the
+ * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies
+ * the JITed program to the RO memory.
+ */
+struct bpf_binary_header *
+bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
+ unsigned int alignment,
+ struct bpf_binary_header **rw_header,
+ u8 **rw_image,
+ bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+ struct bpf_binary_header *ro_header;
+ u32 size, hole, start;
+
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
+ alignment > BPF_IMAGE_ALIGNMENT);
+
+ /* add 16 bytes for a random section of illegal instructions */
+ size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE);
+
+ if (bpf_jit_charge_modmem(size))
+ return NULL;
+ ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
+ if (!ro_header) {
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ *rw_header = kvmalloc(size, GFP_KERNEL);
+ if (!*rw_header) {
+ bpf_arch_text_copy(&ro_header->size, &size, sizeof(size));
+ bpf_prog_pack_free(ro_header);
+ bpf_jit_uncharge_modmem(size);
+ return NULL;
+ }
+
+ /* Fill space with illegal/arch-dep instructions. */
+ bpf_fill_ill_insns(*rw_header, size);
+ (*rw_header)->size = size;
+
+ hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)),
+ BPF_PROG_CHUNK_SIZE - sizeof(*ro_header));
+ start = (get_random_int() % hole) & ~(alignment - 1);
+
+ *image_ptr = &ro_header->image[start];
+ *rw_image = &(*rw_header)->image[start];
+
+ return ro_header;
+}
+
+/* Copy JITed text from rw_header to its final location, the ro_header. */
+int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
+ struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ void *ptr;
+
+ ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size);
+
+ kvfree(rw_header);
+
+ if (IS_ERR(ptr)) {
+ bpf_prog_pack_free(ro_header);
+ return PTR_ERR(ptr);
+ }
+ prog->aux->use_bpf_prog_pack = true;
+ return 0;
+}
+
+/* bpf_jit_binary_pack_free is called in two different scenarios:
+ * 1) when the program is freed after;
+ * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
+ * For case 2), we need to free both the RO memory and the RW buffer.
+ *
+ * bpf_jit_binary_pack_free requires proper ro_header->size. However,
+ * bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size
+ * must be set with either bpf_jit_binary_pack_finalize (normal path) or
+ * bpf_arch_text_copy (when jit fails).
+ */
+void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
+ struct bpf_binary_header *rw_header)
+{
+ u32 size = ro_header->size;
+
+ bpf_prog_pack_free(ro_header);
+ kvfree(rw_header);
+ bpf_jit_uncharge_modmem(size);
+}
+
+static inline struct bpf_binary_header *
+bpf_jit_binary_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->bpf_func;
+ unsigned long addr;
+
+ if (fp->aux->use_bpf_prog_pack)
+ addr = real_start & BPF_PROG_CHUNK_MASK;
+ else
+ addr = real_start & PAGE_MASK;
+
+ return (void *)addr;
}
/* This symbol is only overridden by archs that have different
@@ -918,7 +1197,10 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
if (fp->jited) {
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
- bpf_jit_binary_free(hdr);
+ if (fp->aux->use_bpf_prog_pack)
+ bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
+ else
+ bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
@@ -1146,7 +1428,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
struct bpf_insn *insn;
int i, rewritten;
- if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
+ if (!prog->blinding_requested || prog->blinded)
return prog;
clone = bpf_prog_clone_create(prog, GFP_USER);
@@ -1157,6 +1439,16 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
insn = clone->insnsi;
for (i = 0; i < insn_cnt; i++, insn++) {
+ if (bpf_pseudo_func(insn)) {
+ /* ld_imm64 with an address of bpf subprog is not
+ * a user controlled constant. Don't randomize it,
+ * since it will conflict with jit_subprogs() logic.
+ */
+ insn++;
+ i++;
+ continue;
+ }
+
/* We temporarily need to hold the original ld64 insn
* so that we can still access the first part in the
* second blinding run.
@@ -1661,6 +1953,11 @@ out:
CONT; \
LDX_MEM_##SIZEOP: \
DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
+ CONT; \
+ LDX_PROBE_MEM_##SIZEOP: \
+ bpf_probe_read_kernel(&DST, sizeof(SIZE), \
+ (const void *)(long) (SRC + insn->off)); \
+ DST = *((SIZE *)&DST); \
CONT;
LDST(B, u8)
@@ -1668,15 +1965,6 @@ out:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
-#define LDX_PROBE(SIZEOP, SIZE) \
- LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \
- CONT;
- LDX_PROBE(B, 1)
- LDX_PROBE(H, 2)
- LDX_PROBE(W, 4)
- LDX_PROBE(DW, 8)
-#undef LDX_PROBE
#define ATOMIC_ALU_OP(BOP, KOP) \
case BOP: \
@@ -1829,28 +2117,30 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
}
#endif
-bool bpf_prog_array_compatible(struct bpf_array *array,
- const struct bpf_prog *fp)
+bool bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
bool ret;
if (fp->kprobe_override)
return false;
- spin_lock(&array->aux->owner.lock);
-
- if (!array->aux->owner.type) {
+ spin_lock(&map->owner.lock);
+ if (!map->owner.type) {
/* There's no owner yet where we could check for
* compatibility.
*/
- array->aux->owner.type = fp->type;
- array->aux->owner.jited = fp->jited;
+ map->owner.type = fp->type;
+ map->owner.jited = fp->jited;
+ map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
ret = true;
} else {
- ret = array->aux->owner.type == fp->type &&
- array->aux->owner.jited == fp->jited;
+ ret = map->owner.type == fp->type &&
+ map->owner.jited == fp->jited &&
+ map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
}
- spin_unlock(&array->aux->owner.lock);
+ spin_unlock(&map->owner.lock);
+
return ret;
}
@@ -1862,13 +2152,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
mutex_lock(&aux->used_maps_mutex);
for (i = 0; i < aux->used_map_cnt; i++) {
struct bpf_map *map = aux->used_maps[i];
- struct bpf_array *array;
- if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ if (!map_type_contains_progs(map))
continue;
- array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp)) {
+ if (!bpf_prog_map_compatible(map, fp)) {
ret = -EINVAL;
goto out;
}
@@ -1968,18 +2256,10 @@ static struct bpf_prog_dummy {
},
};
-/* to avoid allocating empty bpf_prog_array for cgroups that
- * don't have bpf program attached use one global 'empty_prog_array'
- * It will not be modified the caller of bpf_prog_array_alloc()
- * (since caller requested prog_cnt == 0)
- * that pointer should be 'freed' by bpf_prog_array_free()
- */
-static struct {
- struct bpf_prog_array hdr;
- struct bpf_prog *null_prog;
-} empty_prog_array = {
+struct bpf_empty_prog_array bpf_empty_prog_array = {
.null_prog = NULL,
};
+EXPORT_SYMBOL(bpf_empty_prog_array);
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
@@ -1989,12 +2269,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
(prog_cnt + 1),
flags);
- return &empty_prog_array.hdr;
+ return &bpf_empty_prog_array.hdr;
}
void bpf_prog_array_free(struct bpf_prog_array *progs)
{
- if (!progs || progs == &empty_prog_array.hdr)
+ if (!progs || progs == &bpf_empty_prog_array.hdr)
return;
kfree_rcu(progs, rcu);
}
@@ -2350,6 +2630,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
const struct bpf_func_proto bpf_map_push_elem_proto __weak;
const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak;
const struct bpf_func_proto bpf_spin_lock_proto __weak;
const struct bpf_func_proto bpf_spin_unlock_proto __weak;
const struct bpf_func_proto bpf_jiffies64_proto __weak;
@@ -2453,6 +2734,16 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return -ENOTSUPP;
}
+void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+
+int __weak bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ return -ENOTSUPP;
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index b3e6b9422238..f4860ac756cd 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -27,6 +27,7 @@
#include <linux/kthread.h>
#include <linux/capability.h>
#include <trace/events/xdp.h>
+#include <linux/btf_ids.h>
#include <linux/netdevice.h> /* netif_receive_skb_list */
#include <linux/etherdevice.h> /* eth_type_trans */
@@ -397,7 +398,8 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
+ struct bpf_map *map, int fd)
{
struct bpf_prog *prog;
@@ -405,7 +407,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ if (prog->expected_attach_type != BPF_XDP_CPUMAP ||
+ !bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return -EINVAL;
}
@@ -457,7 +460,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
- if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+ if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
goto free_ptr_ring;
/* Setup kthread */
@@ -671,7 +674,7 @@ static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
__cpu_map_lookup_elem);
}
-static int cpu_map_btf_id;
+BTF_ID_LIST_SINGLE(cpu_map_btf_ids, struct, bpf_cpu_map)
const struct bpf_map_ops cpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = cpu_map_alloc,
@@ -681,8 +684,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_lookup_elem = cpu_map_lookup_elem,
.map_get_next_key = cpu_map_get_next_key,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_cpu_map",
- .map_btf_id = &cpu_map_btf_id,
+ .map_btf_id = &cpu_map_btf_ids[0],
.map_redirect = cpu_map_redirect,
};
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fe019dbdb3f0..c2867068e5bd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -48,6 +48,7 @@
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
+#include <linux/btf_ids.h>
#define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -858,7 +859,8 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
goto err_put_dev;
- if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
+ !bpf_prog_map_compatible(&dtab->map, prog))
goto err_put_prog;
}
@@ -1004,7 +1006,7 @@ static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
__dev_map_hash_lookup_elem);
}
-static int dev_map_btf_id;
+BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab)
const struct bpf_map_ops dev_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = dev_map_alloc,
@@ -1014,12 +1016,10 @@ const struct bpf_map_ops dev_map_ops = {
.map_update_elem = dev_map_update_elem,
.map_delete_elem = dev_map_delete_elem,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_dtab",
- .map_btf_id = &dev_map_btf_id,
+ .map_btf_id = &dev_map_btf_ids[0],
.map_redirect = dev_map_redirect,
};
-static int dev_map_hash_map_btf_id;
const struct bpf_map_ops dev_map_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = dev_map_alloc,
@@ -1029,8 +1029,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_update_elem = dev_map_hash_update_elem,
.map_delete_elem = dev_map_hash_delete_elem,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_dtab",
- .map_btf_id = &dev_map_hash_map_btf_id,
+ .map_btf_id = &dev_map_btf_ids[0],
.map_redirect = dev_hash_map_redirect,
};
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d29af9988f37..17fb69c0e0dc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -10,6 +10,7 @@
#include <linux/random.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>
+#include <linux/btf_ids.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
#include "map_in_map.h"
@@ -139,7 +140,7 @@ static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
static void htab_init_buckets(struct bpf_htab *htab)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < htab->n_buckets; i++) {
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
@@ -238,7 +239,7 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
u32 num_entries = htab->map.max_entries;
int i;
- if (likely(!map_value_has_timer(&htab->map)))
+ if (!map_value_has_timer(&htab->map))
return;
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
@@ -254,6 +255,25 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
}
}
+static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
+{
+ u32 num_entries = htab->map.max_entries;
+ int i;
+
+ if (!map_value_has_kptrs(&htab->map))
+ return;
+ if (htab_has_extra_elems(htab))
+ num_entries += num_possible_cpus();
+
+ for (i = 0; i < num_entries; i++) {
+ struct htab_elem *elem;
+
+ elem = get_htab_elem(htab, i);
+ bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8));
+ cond_resched();
+ }
+}
+
static void htab_free_elems(struct bpf_htab *htab)
{
int i;
@@ -725,12 +745,15 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}
-static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
+static void check_and_free_fields(struct bpf_htab *htab,
+ struct htab_elem *elem)
{
- if (unlikely(map_value_has_timer(&htab->map)))
- bpf_timer_cancel_and_free(elem->key +
- round_up(htab->map.key_size, 8) +
- htab->map.timer_off);
+ void *map_value = elem->key + round_up(htab->map.key_size, 8);
+
+ if (map_value_has_timer(&htab->map))
+ bpf_timer_cancel_and_free(map_value + htab->map.timer_off);
+ if (map_value_has_kptrs(&htab->map))
+ bpf_map_free_kptrs(&htab->map, map_value);
}
/* It is called from the bpf_lru_list when the LRU needs to delete
@@ -738,7 +761,7 @@ static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
*/
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
- struct bpf_htab *htab = (struct bpf_htab *)arg;
+ struct bpf_htab *htab = arg;
struct htab_elem *l = NULL, *tgt_l;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
@@ -757,7 +780,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
- check_and_free_timer(htab, l);
+ check_and_free_fields(htab, l);
break;
}
@@ -829,7 +852,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
- check_and_free_timer(htab, l);
+ check_and_free_fields(htab, l);
kfree(l);
}
@@ -857,7 +880,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
- check_and_free_timer(htab, l);
+ check_and_free_fields(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
atomic_dec(&htab->count);
@@ -1104,7 +1127,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
else
- check_and_free_timer(htab, l_old);
+ check_and_free_fields(htab, l_old);
}
ret = 0;
err:
@@ -1114,7 +1137,7 @@ err:
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
- check_and_free_timer(htab, elem);
+ check_and_free_fields(htab, elem);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
@@ -1419,8 +1442,14 @@ static void htab_free_malloced_timers(struct bpf_htab *htab)
struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_nulls_for_each_entry(l, n, head, hash_node)
- check_and_free_timer(htab, l);
+ hlist_nulls_for_each_entry(l, n, head, hash_node) {
+ /* We don't reset or free kptr on uref dropping to zero,
+ * hence just free timer.
+ */
+ bpf_timer_cancel_and_free(l->key +
+ round_up(htab->map.key_size, 8) +
+ htab->map.timer_off);
+ }
cond_resched_rcu();
}
rcu_read_unlock();
@@ -1430,7 +1459,8 @@ static void htab_map_free_timers(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- if (likely(!map_value_has_timer(&htab->map)))
+ /* We don't reset or free kptr on uref dropping to zero. */
+ if (!map_value_has_timer(&htab->map))
return;
if (!htab_is_prealloc(htab))
htab_free_malloced_timers(htab);
@@ -1453,11 +1483,14 @@ static void htab_map_free(struct bpf_map *map)
* not have executed. Wait for them.
*/
rcu_barrier();
- if (!htab_is_prealloc(htab))
+ if (!htab_is_prealloc(htab)) {
delete_all_elements(htab);
- else
+ } else {
+ htab_free_prealloced_kptrs(htab);
prealloc_destroy(htab);
+ }
+ bpf_map_free_kptr_off_tab(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
@@ -1594,7 +1627,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
- u32 batch, max_count, size, bucket_size;
+ u32 batch, max_count, size, bucket_size, map_id;
struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
@@ -1636,7 +1669,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
value_size = size * num_possible_cpus();
total = 0;
/* while experimenting with hash tables with sizes ranging from 10 to
- * 1000, it was observed that a bucket can have upto 5 entries.
+ * 1000, it was observed that a bucket can have up to 5 entries.
*/
bucket_size = 5;
@@ -1719,6 +1752,14 @@ again_nocopy:
}
} else {
value = l->key + roundup_key_size;
+ if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ struct bpf_map **inner_map = value;
+
+ /* Actual value is the id of the inner map */
+ map_id = map->ops->map_fd_sys_lookup_elem(*inner_map);
+ value = &map_id;
+ }
+
if (elem_map_flags & BPF_F_LOCK)
copy_map_value_locked(map, dst_val, value,
true);
@@ -2105,7 +2146,7 @@ out:
return num_elems;
}
-static int htab_map_btf_id;
+BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab)
const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
@@ -2122,12 +2163,10 @@ const struct bpf_map_ops htab_map_ops = {
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab),
- .map_btf_name = "bpf_htab",
- .map_btf_id = &htab_map_btf_id,
+ .map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
-static int htab_lru_map_btf_id;
const struct bpf_map_ops htab_lru_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
@@ -2145,8 +2184,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru),
- .map_btf_name = "bpf_htab",
- .map_btf_id = &htab_lru_map_btf_id,
+ .map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
@@ -2161,6 +2199,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
+static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+ struct htab_elem *l;
+
+ if (cpu >= nr_cpu_ids)
+ return NULL;
+
+ l = __htab_map_lookup_elem(map, key);
+ if (l)
+ return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+ else
+ return NULL;
+}
+
static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
{
struct htab_elem *l = __htab_map_lookup_elem(map, key);
@@ -2173,6 +2225,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
+static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
+{
+ struct htab_elem *l;
+
+ if (cpu >= nr_cpu_ids)
+ return NULL;
+
+ l = __htab_map_lookup_elem(map, key);
+ if (l) {
+ bpf_lru_node_set_ref(&l->lru_node);
+ return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu);
+ }
+
+ return NULL;
+}
+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{
struct htab_elem *l;
@@ -2252,7 +2320,6 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
-static int htab_percpu_map_btf_id;
const struct bpf_map_ops htab_percpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
@@ -2263,16 +2330,15 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
+ .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_percpu),
- .map_btf_name = "bpf_htab",
- .map_btf_id = &htab_percpu_map_btf_id,
+ .map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
-static int htab_lru_percpu_map_btf_id;
const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = htab_map_alloc_check,
@@ -2283,12 +2349,12 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
+ .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru_percpu),
- .map_btf_name = "bpf_htab",
- .map_btf_id = &htab_lru_percpu_map_btf_id,
+ .map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
@@ -2412,7 +2478,6 @@ static void htab_of_map_free(struct bpf_map *map)
fd_htab_map_free(map);
}
-static int htab_of_maps_map_btf_id;
const struct bpf_map_ops htab_of_maps_map_ops = {
.map_alloc_check = fd_htab_map_alloc_check,
.map_alloc = htab_of_map_alloc,
@@ -2425,6 +2490,6 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
.map_gen_lookup = htab_of_map_gen_lookup,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_htab",
- .map_btf_id = &htab_of_maps_map_btf_id,
+ BATCH_OPS(htab),
+ .map_btf_id = &htab_map_btf_ids[0],
};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 55c084251fab..225806a02efb 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -17,6 +17,7 @@
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include <linux/security.h>
+#include <linux/btf_ids.h>
#include "../../lib/kstrtox.h"
@@ -102,7 +103,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
};
BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
@@ -115,7 +116,23 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
+};
+
+BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
+}
+
+const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
+ .func = bpf_map_lookup_percpu_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_KEY,
+ .arg3_type = ARG_ANYTHING,
};
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
@@ -224,13 +241,8 @@ BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
if (unlikely(!task))
goto err_clear;
- strncpy(buf, task->comm, size);
-
- /* Verifier guarantees that size > 0. For task->comm exceeding
- * size, guarantee that buf is %NUL-terminated. Unconditionally
- * done here to save the size test.
- */
- buf[size - 1] = 0;
+ /* Verifier guarantees that size > 0 */
+ strscpy(buf, task->comm, size);
return 0;
err_clear:
memset(buf, 0, size);
@@ -672,6 +684,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
+ const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
+{
+ int ret;
+
+ /* flags is not used yet */
+ if (unlikely(flags))
+ return -EINVAL;
+
+ if (unlikely(!size))
+ return 0;
+
+ ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+ if (ret == size)
+ return 0;
+
+ memset(dst, 0, size);
+ /* Return -EFAULT for partial read */
+ return ret < 0 ? ret : -EFAULT;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_task_proto = {
+ .func = bpf_copy_from_user_task,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_BTF_ID,
+ .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+ .arg5_type = ARG_ANYTHING
+};
+
BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
if (cpu >= nr_cpu_ids)
@@ -1059,7 +1104,7 @@ struct bpf_hrtimer {
struct bpf_timer_kern {
struct bpf_hrtimer *timer;
/* bpf_spin_lock is used here instead of spinlock_t to make
- * sure that it always fits into space resereved by struct bpf_timer
+ * sure that it always fits into space reserved by struct bpf_timer
* regardless of LOCKDEP and spinlock debug flags.
*/
struct bpf_spin_lock lock;
@@ -1345,6 +1390,191 @@ out:
kfree(t);
}
+BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
+{
+ unsigned long *kptr = map_value;
+
+ return xchg(kptr, (unsigned long)ptr);
+}
+
+/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
+ * helper is determined dynamically by the verifier.
+ */
+#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
+
+const struct bpf_func_proto bpf_kptr_xchg_proto = {
+ .func = bpf_kptr_xchg,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .ret_btf_id = BPF_PTR_POISON,
+ .arg1_type = ARG_PTR_TO_KPTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
+ .arg2_btf_id = BPF_PTR_POISON,
+};
+
+/* Since the upper 8 bits of dynptr->size is reserved, the
+ * maximum supported size is 2^24 - 1.
+ */
+#define DYNPTR_MAX_SIZE ((1UL << 24) - 1)
+#define DYNPTR_TYPE_SHIFT 28
+#define DYNPTR_SIZE_MASK 0xFFFFFF
+#define DYNPTR_RDONLY_BIT BIT(31)
+
+static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+{
+ return ptr->size & DYNPTR_RDONLY_BIT;
+}
+
+static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
+{
+ ptr->size |= type << DYNPTR_TYPE_SHIFT;
+}
+
+static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+{
+ return ptr->size & DYNPTR_SIZE_MASK;
+}
+
+int bpf_dynptr_check_size(u32 size)
+{
+ return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
+}
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+ ptr->data = data;
+ ptr->offset = offset;
+ ptr->size = size;
+ bpf_dynptr_set_type(ptr, type);
+}
+
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+ memset(ptr, 0, sizeof(*ptr));
+}
+
+static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+{
+ u32 size = bpf_dynptr_get_size(ptr);
+
+ if (len > size || offset > size - len)
+ return -E2BIG;
+
+ return 0;
+}
+
+BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
+{
+ int err;
+
+ err = bpf_dynptr_check_size(size);
+ if (err)
+ goto error;
+
+ /* flags is currently unsupported */
+ if (flags) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
+
+ return 0;
+
+error:
+ bpf_dynptr_set_null(ptr);
+ return err;
+}
+
+const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
+ .func = bpf_dynptr_from_mem,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
+};
+
+BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset)
+{
+ int err;
+
+ if (!src->data)
+ return -EINVAL;
+
+ err = bpf_dynptr_check_off_len(src, offset, len);
+ if (err)
+ return err;
+
+ memcpy(dst, src->data + src->offset + offset, len);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_dynptr_read_proto = {
+ .func = bpf_dynptr_read,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_DYNPTR,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len)
+{
+ int err;
+
+ if (!dst->data || bpf_dynptr_is_rdonly(dst))
+ return -EINVAL;
+
+ err = bpf_dynptr_check_off_len(dst, offset, len);
+ if (err)
+ return err;
+
+ memcpy(dst->data + dst->offset + offset, src, len);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_dynptr_write_proto = {
+ .func = bpf_dynptr_write,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg4_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
+BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+{
+ int err;
+
+ if (!ptr->data)
+ return 0;
+
+ err = bpf_dynptr_check_off_len(ptr, offset, len);
+ if (err)
+ return 0;
+
+ if (bpf_dynptr_is_rdonly(ptr))
+ return 0;
+
+ return (unsigned long)(ptr->data + ptr->offset + offset);
+}
+
+const struct bpf_func_proto bpf_dynptr_data_proto = {
+ .func = bpf_dynptr_data,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
+ .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1369,6 +1599,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_map_pop_elem_proto;
case BPF_FUNC_map_peek_elem:
return &bpf_map_peek_elem_proto;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ return &bpf_map_lookup_percpu_elem_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
@@ -1391,12 +1623,26 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto;
+ case BPF_FUNC_ringbuf_reserve_dynptr:
+ return &bpf_ringbuf_reserve_dynptr_proto;
+ case BPF_FUNC_ringbuf_submit_dynptr:
+ return &bpf_ringbuf_submit_dynptr_proto;
+ case BPF_FUNC_ringbuf_discard_dynptr:
+ return &bpf_ringbuf_discard_dynptr_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
case BPF_FUNC_loop:
return &bpf_loop_proto;
case BPF_FUNC_strncmp:
return &bpf_strncmp_proto;
+ case BPF_FUNC_dynptr_from_mem:
+ return &bpf_dynptr_from_mem_proto;
+ case BPF_FUNC_dynptr_read:
+ return &bpf_dynptr_read_proto;
+ case BPF_FUNC_dynptr_write:
+ return &bpf_dynptr_write_proto;
+ case BPF_FUNC_dynptr_data:
+ return &bpf_dynptr_data_proto;
default:
break;
}
@@ -1423,6 +1669,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_timer_start_proto;
case BPF_FUNC_timer_cancel:
return &bpf_timer_cancel_proto;
+ case BPF_FUNC_kptr_xchg:
+ return &bpf_kptr_xchg_proto;
default:
break;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5a8d9f7467bf..4f841e16779e 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -710,11 +710,10 @@ static DEFINE_MUTEX(bpf_preload_lock);
static int populate_bpffs(struct dentry *parent)
{
struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};
- struct bpf_link *links[BPF_PRELOAD_LINKS] = {};
int err = 0, i;
/* grab the mutex to make sure the kernel interactions with bpf_preload
- * UMD are serialized
+ * are serialized
*/
mutex_lock(&bpf_preload_lock);
@@ -722,40 +721,22 @@ static int populate_bpffs(struct dentry *parent)
if (!bpf_preload_mod_get())
goto out;
- if (!bpf_preload_ops->info.tgid) {
- /* preload() will start UMD that will load BPF iterator programs */
- err = bpf_preload_ops->preload(objs);
- if (err)
+ err = bpf_preload_ops->preload(objs);
+ if (err)
+ goto out_put;
+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
+ bpf_link_inc(objs[i].link);
+ err = bpf_iter_link_pin_kernel(parent,
+ objs[i].link_name, objs[i].link);
+ if (err) {
+ bpf_link_put(objs[i].link);
goto out_put;
- for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
- links[i] = bpf_link_by_id(objs[i].link_id);
- if (IS_ERR(links[i])) {
- err = PTR_ERR(links[i]);
- goto out_put;
- }
}
- for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
- err = bpf_iter_link_pin_kernel(parent,
- objs[i].link_name, links[i]);
- if (err)
- goto out_put;
- /* do not unlink successfully pinned links even
- * if later link fails to pin
- */
- links[i] = NULL;
- }
- /* finish() will tell UMD process to exit */
- err = bpf_preload_ops->finish();
- if (err)
- goto out_put;
}
out_put:
bpf_preload_mod_put();
out:
mutex_unlock(&bpf_preload_lock);
- for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)
- if (!IS_ERR_OR_NULL(links[i]))
- bpf_link_put(links[i]);
return err;
}
diff --git a/kernel/bpf/link_iter.c b/kernel/bpf/link_iter.c
new file mode 100644
index 000000000000..fec8005a121c
--- /dev/null
+++ b/kernel/bpf/link_iter.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <linux/btf_ids.h>
+
+struct bpf_iter_seq_link_info {
+ u32 link_id;
+};
+
+static void *bpf_link_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_link_info *info = seq->private;
+ struct bpf_link *link;
+
+ link = bpf_link_get_curr_or_next(&info->link_id);
+ if (!link)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return link;
+}
+
+static void *bpf_link_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_link_info *info = seq->private;
+
+ ++*pos;
+ ++info->link_id;
+ bpf_link_put((struct bpf_link *)v);
+ return bpf_link_get_curr_or_next(&info->link_id);
+}
+
+struct bpf_iter__bpf_link {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_link *, link);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_link, struct bpf_iter_meta *meta, struct bpf_link *link)
+
+static int __bpf_link_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter__bpf_link ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ ctx.meta = &meta;
+ ctx.link = v;
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog)
+ ret = bpf_iter_run_prog(prog, &ctx);
+
+ return ret;
+}
+
+static int bpf_link_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_link_seq_show(seq, v, false);
+}
+
+static void bpf_link_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_link_seq_show(seq, v, true);
+ else
+ bpf_link_put((struct bpf_link *)v);
+}
+
+static const struct seq_operations bpf_link_seq_ops = {
+ .start = bpf_link_seq_start,
+ .next = bpf_link_seq_next,
+ .stop = bpf_link_seq_stop,
+ .show = bpf_link_seq_show,
+};
+
+BTF_ID_LIST(btf_bpf_link_id)
+BTF_ID(struct, bpf_link)
+
+static const struct bpf_iter_seq_info bpf_link_seq_info = {
+ .seq_ops = &bpf_link_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_link_info),
+};
+
+static struct bpf_iter_reg bpf_link_reg_info = {
+ .target = "bpf_link",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_link, link),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &bpf_link_seq_info,
+};
+
+static int __init bpf_link_iter_init(void)
+{
+ bpf_link_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_link_id;
+ return bpf_iter_reg_target(&bpf_link_reg_info);
+}
+
+late_initcall(bpf_link_iter_init);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 23f7f9d08a62..8654fc97f5fe 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -1,4 +1,4 @@
-//SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf-cgroup.h>
#include <linux/bpf.h>
#include <linux/bpf_local_storage.h>
@@ -9,6 +9,7 @@
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
#ifdef CONFIG_CGROUP_BPF
@@ -446,7 +447,8 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
-static int cgroup_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct,
+ bpf_cgroup_storage_map)
const struct bpf_map_ops cgroup_storage_map_ops = {
.map_alloc = cgroup_storage_map_alloc,
.map_free = cgroup_storage_map_free,
@@ -456,8 +458,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
.map_delete_elem = cgroup_storage_delete_elem,
.map_check_btf = cgroup_storage_check_btf,
.map_seq_show_elem = cgroup_storage_seq_show_elem,
- .map_btf_name = "bpf_cgroup_storage_map",
- .map_btf_id = &cgroup_storage_map_btf_id,
+ .map_btf_id = &cgroup_storage_map_btf_ids[0],
};
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 5763cc7ac4f1..f0d05a3cc4b9 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -14,6 +14,7 @@
#include <linux/vmalloc.h>
#include <net/ipv6.h>
#include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
/* Intermediate node */
#define LPM_TREE_NODE_FLAG_IM BIT(0)
@@ -719,7 +720,7 @@ static int trie_check_btf(const struct bpf_map *map,
-EINVAL : 0;
}
-static int trie_map_btf_id;
+BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie)
const struct bpf_map_ops trie_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = trie_alloc,
@@ -732,6 +733,5 @@ const struct bpf_map_ops trie_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_delete_batch = generic_map_delete_batch,
.map_check_btf = trie_check_btf,
- .map_btf_name = "lpm_trie",
- .map_btf_id = &trie_map_btf_id,
+ .map_btf_id = &trie_map_btf_ids[0],
};
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 5cd8f5277279..135205d0d560 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -52,6 +52,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
inner_map_meta->max_entries = inner_map->max_entries;
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
inner_map_meta->timer_off = inner_map->timer_off;
+ inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map);
if (inner_map->btf) {
btf_get(inner_map->btf);
inner_map_meta->btf = inner_map->btf;
@@ -71,6 +72,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
void bpf_map_meta_free(struct bpf_map *map_meta)
{
+ bpf_map_free_kptr_off_tab(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
}
@@ -83,7 +85,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
meta0->key_size == meta1->key_size &&
meta0->value_size == meta1->value_size &&
meta0->timer_off == meta1->timer_off &&
- meta0->map_flags == meta1->map_flags;
+ meta0->map_flags == meta1->map_flags &&
+ bpf_map_equal_kptr_off_tab(meta0, meta1);
}
void *bpf_map_fd_get_ptr(struct bpf_map *map,
diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig
index 26bced262473..c9d45c9d6918 100644
--- a/kernel/bpf/preload/Kconfig
+++ b/kernel/bpf/preload/Kconfig
@@ -18,10 +18,9 @@ menuconfig BPF_PRELOAD
if BPF_PRELOAD
config BPF_PRELOAD_UMD
- tristate "bpf_preload kernel module with user mode driver"
- depends on CC_CAN_LINK
- depends on m || CC_CAN_LINK_STATIC
+ tristate "bpf_preload kernel module"
default m
help
- This builds bpf_preload kernel module with embedded user mode driver.
+ This builds bpf_preload kernel module with embedded BPF programs for
+ introspection in bpffs.
endif
diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 1400ac58178e..20f89cc0a0a6 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -1,42 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
-LIBBPF_OUT = $(abspath $(obj))/libbpf
-LIBBPF_A = $(LIBBPF_OUT)/libbpf.a
-LIBBPF_DESTDIR = $(LIBBPF_OUT)
-LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
-
-# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test
-# in tools/scripts/Makefile.include always succeeds when building the kernel
-# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg".
-$(LIBBPF_A): | $(LIBBPF_OUT)
- $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \
- DESTDIR=$(LIBBPF_DESTDIR) prefix= \
- $(LIBBPF_OUT)/libbpf.a install_headers
-
-libbpf_hdrs: $(LIBBPF_A)
-
-.PHONY: libbpf_hdrs
-
-$(LIBBPF_OUT):
- $(call msg,MKDIR,$@)
- $(Q)mkdir -p $@
-
-userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
- -I $(LIBBPF_INCLUDE) -Wno-unused-result
-
-userprogs := bpf_preload_umd
-
-clean-files := libbpf/
-
-$(obj)/iterators/iterators.o: | libbpf_hdrs
-
-bpf_preload_umd-objs := iterators/iterators.o
-bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
-
-$(obj)/bpf_preload_umd: $(LIBBPF_A)
-
-$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd
+LIBBPF_INCLUDE = $(srctree)/tools/lib
obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o
-bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o
+CFLAGS_bpf_preload_kern.o += -I$(LIBBPF_INCLUDE)
+bpf_preload-objs += bpf_preload_kern.o
diff --git a/kernel/bpf/preload/bpf_preload.h b/kernel/bpf/preload/bpf_preload.h
index 2f9932276f2e..f065c91213a0 100644
--- a/kernel/bpf/preload/bpf_preload.h
+++ b/kernel/bpf/preload/bpf_preload.h
@@ -2,13 +2,13 @@
#ifndef _BPF_PRELOAD_H
#define _BPF_PRELOAD_H
-#include <linux/usermode_driver.h>
-#include "iterators/bpf_preload_common.h"
+struct bpf_preload_info {
+ char link_name[16];
+ struct bpf_link *link;
+};
struct bpf_preload_ops {
- struct umd_info info;
int (*preload)(struct bpf_preload_info *);
- int (*finish)(void);
struct module *owner;
};
extern struct bpf_preload_ops *bpf_preload_ops;
diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
index 53736e52c1df..5106b5372f0c 100644
--- a/kernel/bpf/preload/bpf_preload_kern.c
+++ b/kernel/bpf/preload/bpf_preload_kern.c
@@ -2,101 +2,87 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/pid.h>
-#include <linux/fs.h>
-#include <linux/sched/signal.h>
#include "bpf_preload.h"
+#include "iterators/iterators.lskel.h"
-extern char bpf_preload_umd_start;
-extern char bpf_preload_umd_end;
+static struct bpf_link *maps_link, *progs_link;
+static struct iterators_bpf *skel;
-static int preload(struct bpf_preload_info *obj);
-static int finish(void);
+static void free_links_and_skel(void)
+{
+ if (!IS_ERR_OR_NULL(maps_link))
+ bpf_link_put(maps_link);
+ if (!IS_ERR_OR_NULL(progs_link))
+ bpf_link_put(progs_link);
+ iterators_bpf__destroy(skel);
+}
+
+static int preload(struct bpf_preload_info *obj)
+{
+ strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name));
+ obj[0].link = maps_link;
+ strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name));
+ obj[1].link = progs_link;
+ return 0;
+}
-static struct bpf_preload_ops umd_ops = {
- .info.driver_name = "bpf_preload",
+static struct bpf_preload_ops ops = {
.preload = preload,
- .finish = finish,
.owner = THIS_MODULE,
};
-static int preload(struct bpf_preload_info *obj)
+static int load_skel(void)
{
- int magic = BPF_PRELOAD_START;
- loff_t pos = 0;
- int i, err;
- ssize_t n;
+ int err;
- err = fork_usermode_driver(&umd_ops.info);
+ skel = iterators_bpf__open();
+ if (!skel)
+ return -ENOMEM;
+ err = iterators_bpf__load(skel);
if (err)
- return err;
-
- /* send the start magic to let UMD proceed with loading BPF progs */
- n = kernel_write(umd_ops.info.pipe_to_umh,
- &magic, sizeof(magic), &pos);
- if (n != sizeof(magic))
- return -EPIPE;
-
- /* receive bpf_link IDs and names from UMD */
- pos = 0;
- for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
- n = kernel_read(umd_ops.info.pipe_from_umh,
- &obj[i], sizeof(*obj), &pos);
- if (n != sizeof(*obj))
- return -EPIPE;
+ goto out;
+ err = iterators_bpf__attach(skel);
+ if (err)
+ goto out;
+ maps_link = bpf_link_get_from_fd(skel->links.dump_bpf_map_fd);
+ if (IS_ERR(maps_link)) {
+ err = PTR_ERR(maps_link);
+ goto out;
}
- return 0;
-}
-
-static int finish(void)
-{
- int magic = BPF_PRELOAD_END;
- struct pid *tgid;
- loff_t pos = 0;
- ssize_t n;
-
- /* send the last magic to UMD. It will do a normal exit. */
- n = kernel_write(umd_ops.info.pipe_to_umh,
- &magic, sizeof(magic), &pos);
- if (n != sizeof(magic))
- return -EPIPE;
-
- tgid = umd_ops.info.tgid;
- if (tgid) {
- wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
- umd_cleanup_helper(&umd_ops.info);
+ progs_link = bpf_link_get_from_fd(skel->links.dump_bpf_prog_fd);
+ if (IS_ERR(progs_link)) {
+ err = PTR_ERR(progs_link);
+ goto out;
}
+ /* Avoid taking over stdin/stdout/stderr of init process. Zeroing out
+ * makes skel_closenz() a no-op later in iterators_bpf__destroy().
+ */
+ close_fd(skel->links.dump_bpf_map_fd);
+ skel->links.dump_bpf_map_fd = 0;
+ close_fd(skel->links.dump_bpf_prog_fd);
+ skel->links.dump_bpf_prog_fd = 0;
return 0;
+out:
+ free_links_and_skel();
+ return err;
}
-static int __init load_umd(void)
+static int __init load(void)
{
int err;
- err = umd_load_blob(&umd_ops.info, &bpf_preload_umd_start,
- &bpf_preload_umd_end - &bpf_preload_umd_start);
+ err = load_skel();
if (err)
return err;
- bpf_preload_ops = &umd_ops;
+ bpf_preload_ops = &ops;
return err;
}
-static void __exit fini_umd(void)
+static void __exit fini(void)
{
- struct pid *tgid;
-
bpf_preload_ops = NULL;
-
- /* kill UMD in case it's still there due to earlier error */
- tgid = umd_ops.info.tgid;
- if (tgid) {
- kill_pid(tgid, SIGKILL, 1);
-
- wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
- umd_cleanup_helper(&umd_ops.info);
- }
- umd_unload_blob(&umd_ops.info);
+ free_links_and_skel();
}
-late_initcall(load_umd);
-module_exit(fini_umd);
+late_initcall(load);
+module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/kernel/bpf/preload/bpf_preload_umd_blob.S b/kernel/bpf/preload/bpf_preload_umd_blob.S
deleted file mode 100644
index f1f40223b5c3..000000000000
--- a/kernel/bpf/preload/bpf_preload_umd_blob.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
- .section .init.rodata, "a"
- .global bpf_preload_umd_start
-bpf_preload_umd_start:
- .incbin "kernel/bpf/preload/bpf_preload_umd"
- .global bpf_preload_umd_end
-bpf_preload_umd_end:
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index b8bd60511227..bfe24f8c5a20 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -35,15 +35,15 @@ endif
.PHONY: all clean
-all: iterators.skel.h
+all: iterators.lskel.h
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) iterators
-iterators.skel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
+iterators.lskel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
- $(Q)$(BPFTOOL) gen skeleton $< > $@
+ $(Q)$(BPFTOOL) gen skeleton -L $< > $@
$(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
diff --git a/kernel/bpf/preload/iterators/bpf_preload_common.h b/kernel/bpf/preload/iterators/bpf_preload_common.h
deleted file mode 100644
index 8464d1a48c05..000000000000
--- a/kernel/bpf/preload/iterators/bpf_preload_common.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BPF_PRELOAD_COMMON_H
-#define _BPF_PRELOAD_COMMON_H
-
-#define BPF_PRELOAD_START 0x5555
-#define BPF_PRELOAD_END 0xAAAA
-
-struct bpf_preload_info {
- char link_name[16];
- int link_id;
-};
-
-#endif
diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c
deleted file mode 100644
index 5d872a705470..000000000000
--- a/kernel/bpf/preload/iterators/iterators.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2020 Facebook */
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/resource.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include <sys/mount.h>
-#include "iterators.skel.h"
-#include "bpf_preload_common.h"
-
-int to_kernel = -1;
-int from_kernel = 0;
-
-static int send_link_to_kernel(struct bpf_link *link, const char *link_name)
-{
- struct bpf_preload_info obj = {};
- struct bpf_link_info info = {};
- __u32 info_len = sizeof(info);
- int err;
-
- err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
- if (err)
- return err;
- obj.link_id = info.id;
- if (strlen(link_name) >= sizeof(obj.link_name))
- return -E2BIG;
- strcpy(obj.link_name, link_name);
- if (write(to_kernel, &obj, sizeof(obj)) != sizeof(obj))
- return -EPIPE;
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
- struct iterators_bpf *skel;
- int err, magic;
- int debug_fd;
-
- debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
- if (debug_fd < 0)
- return 1;
- to_kernel = dup(1);
- close(1);
- dup(debug_fd);
- /* now stdin and stderr point to /dev/console */
-
- read(from_kernel, &magic, sizeof(magic));
- if (magic != BPF_PRELOAD_START) {
- printf("bad start magic %d\n", magic);
- return 1;
- }
- setrlimit(RLIMIT_MEMLOCK, &rlim);
- /* libbpf opens BPF object and loads it into the kernel */
- skel = iterators_bpf__open_and_load();
- if (!skel) {
- /* iterators.skel.h is little endian.
- * libbpf doesn't support automatic little->big conversion
- * of BPF bytecode yet.
- * The program load will fail in such case.
- */
- printf("Failed load could be due to wrong endianness\n");
- return 1;
- }
- err = iterators_bpf__attach(skel);
- if (err)
- goto cleanup;
-
- /* send two bpf_link IDs with names to the kernel */
- err = send_link_to_kernel(skel->links.dump_bpf_map, "maps.debug");
- if (err)
- goto cleanup;
- err = send_link_to_kernel(skel->links.dump_bpf_prog, "progs.debug");
- if (err)
- goto cleanup;
-
- /* The kernel will proceed with pinnging the links in bpffs.
- * UMD will wait on read from pipe.
- */
- read(from_kernel, &magic, sizeof(magic));
- if (magic != BPF_PRELOAD_END) {
- printf("bad final magic %d\n", magic);
- err = -EINVAL;
- }
-cleanup:
- iterators_bpf__destroy(skel);
-
- return err != 0;
-}
diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel.h
new file mode 100644
index 000000000000..70f236a82fe1
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.lskel.h
@@ -0,0 +1,425 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* THIS FILE IS AUTOGENERATED! */
+#ifndef __ITERATORS_BPF_SKEL_H__
+#define __ITERATORS_BPF_SKEL_H__
+
+#include <bpf/skel_internal.h>
+
+struct iterators_bpf {
+ struct bpf_loader_ctx ctx;
+ struct {
+ struct bpf_map_desc rodata;
+ } maps;
+ struct {
+ struct bpf_prog_desc dump_bpf_map;
+ struct bpf_prog_desc dump_bpf_prog;
+ } progs;
+ struct {
+ int dump_bpf_map_fd;
+ int dump_bpf_prog_fd;
+ } links;
+ struct iterators_bpf__rodata {
+ } *rodata;
+};
+
+static inline int
+iterators_bpf__dump_bpf_map__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_map.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_map_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__dump_bpf_prog__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_prog.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_prog_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__attach(struct iterators_bpf *skel)
+{
+ int ret = 0;
+
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_map__attach(skel);
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_prog__attach(skel);
+ return ret < 0 ? ret : 0;
+}
+
+static inline void
+iterators_bpf__detach(struct iterators_bpf *skel)
+{
+ skel_closenz(skel->links.dump_bpf_map_fd);
+ skel_closenz(skel->links.dump_bpf_prog_fd);
+}
+static void
+iterators_bpf__destroy(struct iterators_bpf *skel)
+{
+ if (!skel)
+ return;
+ iterators_bpf__detach(skel);
+ skel_closenz(skel->progs.dump_bpf_map.prog_fd);
+ skel_closenz(skel->progs.dump_bpf_prog.prog_fd);
+ skel_free_map_data(skel->rodata, skel->maps.rodata.initial_value, 4096);
+ skel_closenz(skel->maps.rodata.map_fd);
+ skel_free(skel);
+}
+static inline struct iterators_bpf *
+iterators_bpf__open(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = skel_alloc(sizeof(*skel));
+ if (!skel)
+ goto cleanup;
+ skel->ctx.sz = (void *)&skel->links - (void *)skel;
+ skel->rodata = skel_prep_map_data((void *)"\
+\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\
+\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\
+\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 4096, 98);
+ if (!skel->rodata)
+ goto cleanup;
+ skel->maps.rodata.initial_value = (__u64) (long) skel->rodata;
+ return skel;
+cleanup:
+ iterators_bpf__destroy(skel);
+ return NULL;
+}
+
+static inline int
+iterators_bpf__load(struct iterators_bpf *skel)
+{
+ struct bpf_load_and_run_opts opts = {};
+ int err;
+
+ opts.ctx = (struct bpf_loader_ctx *)skel;
+ opts.data_sz = 6056;
+ opts.data = (void *)"\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\
+\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\xf9\x04\0\0\0\0\0\0\0\0\0\x02\x02\0\
+\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\0\x04\
+\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\0\0\0\0\0\
+\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\0\0\0\x20\
+\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xa3\0\0\0\x03\0\0\x04\x18\0\0\0\xb1\0\
+\0\0\x09\0\0\0\0\0\0\0\xb5\0\0\0\x0b\0\0\0\x40\0\0\0\xc0\0\0\0\x0b\0\0\0\x80\0\
+\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xc8\0\0\0\0\0\0\x07\0\0\0\0\xd1\0\0\0\0\0\0\
+\x08\x0c\0\0\0\xd7\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\x94\x01\0\0\x03\0\0\x04\
+\x18\0\0\0\x9c\x01\0\0\x0e\0\0\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xa4\
+\x01\0\0\x0e\0\0\0\xa0\0\0\0\xb0\x01\0\0\0\0\0\x08\x0f\0\0\0\xb6\x01\0\0\0\0\0\
+\x01\x04\0\0\0\x20\0\0\0\xc3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
+\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xc8\x01\0\0\0\0\0\x01\x04\0\0\0\
+\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x2c\x02\0\0\x02\0\0\x04\x10\0\0\0\x13\0\
+\0\0\x03\0\0\0\0\0\0\0\x3f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x18\0\
+\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x44\x02\0\0\x01\0\0\x0c\
+\x16\0\0\0\x90\x02\0\0\x01\0\0\x04\x08\0\0\0\x99\x02\0\0\x19\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x02\x1a\0\0\0\xea\x02\0\0\x06\0\0\x04\x38\0\0\0\x9c\x01\0\0\x0e\0\0\
+\0\0\0\0\0\x9f\x01\0\0\x11\0\0\0\x20\0\0\0\xf7\x02\0\0\x1b\0\0\0\xc0\0\0\0\x08\
+\x03\0\0\x15\0\0\0\0\x01\0\0\x11\x03\0\0\x1d\0\0\0\x40\x01\0\0\x1b\x03\0\0\x1e\
+\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\0\0\0\0\
+\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x65\x03\0\0\x02\0\0\x04\
+\x08\0\0\0\x73\x03\0\0\x0e\0\0\0\0\0\0\0\x7c\x03\0\0\x0e\0\0\0\x20\0\0\0\x1b\
+\x03\0\0\x03\0\0\x04\x18\0\0\0\x86\x03\0\0\x1b\0\0\0\0\0\0\0\x8e\x03\0\0\x21\0\
+\0\0\x40\0\0\0\x94\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\0\0\0\
+\0\0\0\0\0\x02\x24\0\0\0\x98\x03\0\0\x01\0\0\x04\x04\0\0\0\xa3\x03\0\0\x0e\0\0\
+\0\0\0\0\0\x0c\x04\0\0\x01\0\0\x04\x04\0\0\0\x15\x04\0\0\x0e\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x8b\x04\0\0\0\0\0\x0e\x25\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\x9f\x04\
+\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\
+\x20\0\0\0\xb5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\
+\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xca\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xe1\x04\0\0\0\0\0\x0e\x2d\0\0\
+\0\x01\0\0\0\xe9\x04\0\0\x04\0\0\x0f\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\x28\
+\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\0\
+\x11\0\0\0\xf1\x04\0\0\x01\0\0\x0f\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\
+\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\
+\x3a\x30\0\x2f\x77\x2f\x6e\x65\x74\x2d\x6e\x65\x78\x74\x2f\x6b\x65\x72\x6e\x65\
+\x6c\x2f\x62\x70\x66\x2f\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\
+\x74\x6f\x72\x73\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\
+\x63\0\x09\x73\x74\x72\x75\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\
+\x73\x65\x71\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\
+\x71\x3b\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\
+\x73\x65\x73\x73\x69\x6f\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\
+\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\
+\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\
+\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\
+\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\
+\x29\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\
+\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x30\
+\x3a\x32\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\
+\x29\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\
+\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\
+\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\
+\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
+\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
+\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
+\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
+\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
+\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
+\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
+\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
+\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
+\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
+\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
+\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
+\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
+\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
+\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
+\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
+\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
+\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
+\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
+\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
+\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
+\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
+\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
+\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
+\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
+\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
+\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
+\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
+\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
+\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
+\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
+\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
+\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
+\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
+\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
+\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
+\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
+\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2d\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\
+\0\x04\0\0\0\x62\0\0\0\x01\0\0\0\x80\x04\0\0\0\0\0\0\0\0\0\0\x69\x74\x65\x72\
+\x61\x74\x6f\x72\x2e\x72\x6f\x64\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\
+\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\
+\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\
+\x25\x73\x20\x25\x73\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\
+\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\x1b\0\0\
+\0\0\0\x79\x11\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\
+\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\
+\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\
+\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\
+\x7b\x1a\xf8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\
+\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x23\0\0\0\xb7\x03\0\0\x0e\0\0\0\
+\xb7\x05\0\0\x18\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x3c\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x3c\x01\0\x02\0\0\0\x42\0\0\0\xee\0\0\0\x1d\x44\x01\0\x03\0\0\
+\0\x42\0\0\0\x0f\x01\0\0\x06\x4c\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\x40\
+\x01\0\x05\0\0\0\x42\0\0\0\x1a\x01\0\0\x1d\x40\x01\0\x06\0\0\0\x42\0\0\0\x43\
+\x01\0\0\x06\x58\x01\0\x08\0\0\0\x42\0\0\0\x56\x01\0\0\x03\x5c\x01\0\x0f\0\0\0\
+\x42\0\0\0\xdc\x01\0\0\x02\x64\x01\0\x1f\0\0\0\x42\0\0\0\x2a\x02\0\0\x01\x6c\
+\x01\0\0\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\
+\0\x10\0\0\0\x02\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x02\0\0\0\x3e\0\0\0\0\0\0\0\
+\x28\0\0\0\x08\0\0\0\x3f\x01\0\0\0\0\0\0\x78\0\0\0\x0d\0\0\0\x3e\0\0\0\0\0\0\0\
+\x88\0\0\0\x0d\0\0\0\xea\0\0\0\0\0\0\0\xa8\0\0\0\x0d\0\0\0\x3f\x01\0\0\0\0\0\0\
+\x1a\0\0\0\x21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\
+\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\
+\0\0\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x10\0\0\0\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x6d\
+\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\
+\0\0\0\0\x79\x12\x08\0\0\0\0\0\x15\x02\x3c\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x79\
+\x27\0\0\0\0\0\0\x79\x11\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\
+\0\x07\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\
+\x31\0\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\
+\x6a\xc8\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\
+\x04\0\0\0\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\
+\x78\x30\0\0\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\
+\0\x61\x11\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\
+\0\0\0\0\0\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\
+\xb7\x02\0\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\
+\0\0\0\0\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\
+\xb7\x02\0\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\
+\xff\0\0\0\0\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\
+\xbf\x69\0\0\0\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\
+\xff\0\0\0\0\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\
+\x1a\xe8\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\
+\xc8\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x51\0\0\0\xb7\x03\0\0\x11\0\0\0\
+\xb7\x05\0\0\x20\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x7b\0\0\0\x1e\x80\x01\0\x01\0\0\0\x42\0\0\
+\0\x7b\0\0\0\x24\x80\x01\0\x02\0\0\0\x42\0\0\0\x60\x02\0\0\x1f\x88\x01\0\x03\0\
+\0\0\x42\0\0\0\x84\x02\0\0\x06\x94\x01\0\x04\0\0\0\x42\0\0\0\x1a\x01\0\0\x17\
+\x84\x01\0\x05\0\0\0\x42\0\0\0\x9d\x02\0\0\x0e\xa0\x01\0\x06\0\0\0\x42\0\0\0\
+\x1a\x01\0\0\x1d\x84\x01\0\x07\0\0\0\x42\0\0\0\x43\x01\0\0\x06\xa4\x01\0\x09\0\
+\0\0\x42\0\0\0\xaf\x02\0\0\x03\xa8\x01\0\x11\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\
+\xb0\x01\0\x18\0\0\0\x42\0\0\0\x5a\x03\0\0\x06\x04\x01\0\x1b\0\0\0\x42\0\0\0\0\
+\0\0\0\0\0\0\0\x1c\0\0\0\x42\0\0\0\xab\x03\0\0\x0f\x10\x01\0\x1d\0\0\0\x42\0\0\
+\0\xc0\x03\0\0\x2d\x14\x01\0\x1f\0\0\0\x42\0\0\0\xf7\x03\0\0\x0d\x0c\x01\0\x21\
+\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x22\0\0\0\x42\0\0\0\xc0\x03\0\0\x02\x14\x01\0\
+\x25\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x28\0\0\0\x42\0\0\0\0\0\0\0\0\0\
+\0\0\x29\0\0\0\x42\0\0\0\x1e\x04\0\0\x0d\x18\x01\0\x2c\0\0\0\x42\0\0\0\x1e\x04\
+\0\0\x0d\x18\x01\0\x2d\0\0\0\x42\0\0\0\x4c\x04\0\0\x1b\x1c\x01\0\x2e\0\0\0\x42\
+\0\0\0\x4c\x04\0\0\x06\x1c\x01\0\x2f\0\0\0\x42\0\0\0\x6f\x04\0\0\x0d\x24\x01\0\
+\x31\0\0\0\x42\0\0\0\x1f\x03\0\0\x02\xb0\x01\0\x40\0\0\0\x42\0\0\0\x2a\x02\0\0\
+\x01\xc0\x01\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\
+\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xea\0\0\0\0\0\0\0\x20\0\0\0\x14\0\0\0\x3e\0\0\0\
+\0\0\0\0\x28\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x30\0\0\0\x08\0\0\0\x3f\x01\0\0\
+\0\0\0\0\x88\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x98\0\0\0\x1a\0\0\0\xea\0\0\0\0\
+\0\0\0\xb0\0\0\0\x1a\0\0\0\x52\x03\0\0\0\0\0\0\xb8\0\0\0\x1a\0\0\0\x56\x03\0\0\
+\0\0\0\0\xc8\0\0\0\x1f\0\0\0\x84\x03\0\0\0\0\0\0\xe0\0\0\0\x20\0\0\0\xea\0\0\0\
+\0\0\0\0\xf8\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x20\x01\0\0\x24\0\0\0\x3e\0\0\0\
+\0\0\0\0\x58\x01\0\0\x1a\0\0\0\xea\0\0\0\0\0\0\0\x68\x01\0\0\x20\0\0\0\x46\x04\
+\0\0\0\0\0\0\x90\x01\0\0\x1a\0\0\0\x3f\x01\0\0\0\0\0\0\xa0\x01\0\0\x1a\0\0\0\
+\x87\x04\0\0\0\0\0\0\xa8\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x42\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0\x1c\0\0\
+\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x1a\0\
+\0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\
+\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\
+\0\0\0\0";
+ opts.insns_sz = 2216;
+ opts.insns = (void *)"\
+\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\
+\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\
+\xa1\x78\xff\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x7c\xff\
+\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x80\xff\0\0\0\0\xd5\
+\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x84\xff\0\0\0\0\xd5\x01\x01\0\0\
+\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\
+\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xbf\x70\0\0\
+\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x48\x0e\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\
+\0\0\x44\x0e\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\
+\0\0\0\0\x38\x0e\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x05\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x12\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\
+\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\
+\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\
+\0\0\0\x61\x60\x1c\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\
+\0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\
+\xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\
+\0\x79\x63\x20\0\0\0\0\0\x15\x03\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\
+\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x61\x60\x04\0\0\0\0\0\x45\0\x02\0\x01\0\0\0\x85\
+\0\0\0\x94\0\0\0\x05\0\x01\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x18\x62\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\
+\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\
+\0\0\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\
+\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\
+\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\
+\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x9f\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\
+\x01\0\0\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\
+\xb7\x03\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x92\xff\
+\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\
+\x78\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\
+\x61\0\0\0\0\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\
+\0\0\0\x40\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\
+\x18\x60\0\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\
+\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\
+\0\0\0\0\0\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\
+\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\
+\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\
+\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\
+\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\
+\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\
+\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\
+\x5c\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\
+\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\
+\0\0\0\0\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\
+\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\
+\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4a\xff\0\0\
+\0\0\x63\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\
+\0\0\0\0\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\
+\x18\x12\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\
+\x60\0\0\0\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\
+\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\
+\0\0\0\0\0\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\
+\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\
+\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\
+\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\
+\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\
+\x01\0\0\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\
+\0\x63\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\
+\0\0\0\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\
+\0\0\0\0\0\xc5\x07\x13\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\
+\x70\x6c\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\
+\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\
+\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\
+\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\
+\x07\x01\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\
+\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\
+\x63\x06\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\
+\0\0\0\0\x95\0\0\0\0\0\0\0";
+ err = bpf_load_and_run(&opts);
+ if (err < 0)
+ return err;
+ skel->rodata = skel_finalize_map_data(&skel->maps.rodata.initial_value,
+ 4096, PROT_READ, skel->maps.rodata.map_fd);
+ if (!skel->rodata)
+ return -ENOMEM;
+ return 0;
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open_and_load(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = iterators_bpf__open();
+ if (!skel)
+ return NULL;
+ if (iterators_bpf__load(skel)) {
+ iterators_bpf__destroy(skel);
+ return NULL;
+ }
+ return skel;
+}
+
+#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/preload/iterators/iterators.skel.h b/kernel/bpf/preload/iterators/iterators.skel.h
deleted file mode 100644
index cf9a6a94b3a4..000000000000
--- a/kernel/bpf/preload/iterators/iterators.skel.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-
-/* THIS FILE IS AUTOGENERATED! */
-#ifndef __ITERATORS_BPF_SKEL_H__
-#define __ITERATORS_BPF_SKEL_H__
-
-#include <stdlib.h>
-#include <bpf/libbpf.h>
-
-struct iterators_bpf {
- struct bpf_object_skeleton *skeleton;
- struct bpf_object *obj;
- struct {
- struct bpf_map *rodata;
- } maps;
- struct {
- struct bpf_program *dump_bpf_map;
- struct bpf_program *dump_bpf_prog;
- } progs;
- struct {
- struct bpf_link *dump_bpf_map;
- struct bpf_link *dump_bpf_prog;
- } links;
- struct iterators_bpf__rodata {
- char dump_bpf_map____fmt[35];
- char dump_bpf_map____fmt_1[14];
- char dump_bpf_prog____fmt[32];
- char dump_bpf_prog____fmt_2[17];
- } *rodata;
-};
-
-static void
-iterators_bpf__destroy(struct iterators_bpf *obj)
-{
- if (!obj)
- return;
- if (obj->skeleton)
- bpf_object__destroy_skeleton(obj->skeleton);
- free(obj);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj);
-
-static inline struct iterators_bpf *
-iterators_bpf__open_opts(const struct bpf_object_open_opts *opts)
-{
- struct iterators_bpf *obj;
-
- obj = (struct iterators_bpf *)calloc(1, sizeof(*obj));
- if (!obj)
- return NULL;
- if (iterators_bpf__create_skeleton(obj))
- goto err;
- if (bpf_object__open_skeleton(obj->skeleton, opts))
- goto err;
-
- return obj;
-err:
- iterators_bpf__destroy(obj);
- return NULL;
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open(void)
-{
- return iterators_bpf__open_opts(NULL);
-}
-
-static inline int
-iterators_bpf__load(struct iterators_bpf *obj)
-{
- return bpf_object__load_skeleton(obj->skeleton);
-}
-
-static inline struct iterators_bpf *
-iterators_bpf__open_and_load(void)
-{
- struct iterators_bpf *obj;
-
- obj = iterators_bpf__open();
- if (!obj)
- return NULL;
- if (iterators_bpf__load(obj)) {
- iterators_bpf__destroy(obj);
- return NULL;
- }
- return obj;
-}
-
-static inline int
-iterators_bpf__attach(struct iterators_bpf *obj)
-{
- return bpf_object__attach_skeleton(obj->skeleton);
-}
-
-static inline void
-iterators_bpf__detach(struct iterators_bpf *obj)
-{
- return bpf_object__detach_skeleton(obj->skeleton);
-}
-
-static inline int
-iterators_bpf__create_skeleton(struct iterators_bpf *obj)
-{
- struct bpf_object_skeleton *s;
-
- s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
- if (!s)
- return -1;
- obj->skeleton = s;
-
- s->sz = sizeof(*s);
- s->name = "iterators_bpf";
- s->obj = &obj->obj;
-
- /* maps */
- s->map_cnt = 1;
- s->map_skel_sz = sizeof(*s->maps);
- s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
- if (!s->maps)
- goto err;
-
- s->maps[0].name = "iterator.rodata";
- s->maps[0].map = &obj->maps.rodata;
- s->maps[0].mmaped = (void **)&obj->rodata;
-
- /* programs */
- s->prog_cnt = 2;
- s->prog_skel_sz = sizeof(*s->progs);
- s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
- if (!s->progs)
- goto err;
-
- s->progs[0].name = "dump_bpf_map";
- s->progs[0].prog = &obj->progs.dump_bpf_map;
- s->progs[0].link = &obj->links.dump_bpf_map;
-
- s->progs[1].name = "dump_bpf_prog";
- s->progs[1].prog = &obj->progs.dump_bpf_prog;
- s->progs[1].link = &obj->links.dump_bpf_prog;
-
- s->data_sz = 7176;
- s->data = (void *)"\
-\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x48\x18\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0f\0\
-\x0e\0\x79\x12\0\0\0\0\0\0\x79\x26\0\0\0\0\0\0\x79\x17\x08\0\0\0\0\0\x15\x07\
-\x1a\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\
-\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\xb7\x03\0\0\x23\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x61\x71\0\
-\0\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\xb7\x01\0\0\x04\0\0\0\xbf\x72\0\0\0\0\0\0\
-\x0f\x12\0\0\0\0\0\0\x7b\x2a\xf0\xff\0\0\0\0\x61\x71\x14\0\0\0\0\0\x7b\x1a\xf8\
-\xff\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xe8\xff\xff\xff\xbf\x61\0\0\0\0\0\
-\0\x18\x02\0\0\x23\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x0e\0\0\0\xb7\x05\0\0\x18\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x79\x12\0\0\0\0\
-\0\0\x79\x26\0\0\0\0\0\0\x79\x11\x08\0\0\0\0\0\x15\x01\x3b\0\0\0\0\0\x79\x17\0\
-\0\0\0\0\0\x79\x21\x10\0\0\0\0\0\x55\x01\x08\0\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\
-\x04\0\0\xd0\xff\xff\xff\xbf\x61\0\0\0\0\0\0\x18\x02\0\0\x31\0\0\0\0\0\0\0\0\0\
-\0\0\xb7\x03\0\0\x20\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x7e\0\0\0\x7b\x6a\xc8\
-\xff\0\0\0\0\x61\x71\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xb7\x03\0\0\x04\0\0\0\
-\xbf\x79\0\0\0\0\0\0\x0f\x39\0\0\0\0\0\0\x79\x71\x28\0\0\0\0\0\x79\x78\x30\0\0\
-\0\0\0\x15\x08\x18\0\0\0\0\0\xb7\x02\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x61\x11\
-\x04\0\0\0\0\0\x79\x83\x08\0\0\0\0\0\x67\x01\0\0\x03\0\0\0\x0f\x13\0\0\0\0\0\0\
-\x79\x86\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf8\xff\xff\xff\xb7\x02\0\
-\0\x08\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x01\0\0\0\0\0\0\x79\xa3\xf8\xff\0\0\0\0\
-\x0f\x13\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\xf4\xff\xff\xff\xb7\x02\0\
-\0\x04\0\0\0\x85\0\0\0\x71\0\0\0\xb7\x03\0\0\x04\0\0\0\x61\xa1\xf4\xff\0\0\0\0\
-\x61\x82\x10\0\0\0\0\0\x3d\x21\x02\0\0\0\0\0\x0f\x16\0\0\0\0\0\0\xbf\x69\0\0\0\
-\0\0\0\x7b\x9a\xd8\xff\0\0\0\0\x79\x71\x18\0\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\
-\x79\x71\x20\0\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x31\0\0\0\0\0\0\x7b\x1a\xe8\xff\
-\0\0\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\xd0\xff\xff\xff\x79\xa1\xc8\xff\0\0\0\
-\0\x18\x02\0\0\x51\0\0\0\0\0\0\0\0\0\0\0\xb7\x03\0\0\x11\0\0\0\xb7\x05\0\0\x20\
-\0\0\0\x85\0\0\0\x7e\0\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x20\x20\x69\x64\
-\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x6d\
-\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\
-\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x0a\0\x25\x34\
-\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\x47\x50\x4c\0\x9f\
-\xeb\x01\0\x18\0\0\0\0\0\0\0\x1c\x04\0\0\x1c\x04\0\0\x09\x05\0\0\0\0\0\0\0\0\0\
-\x02\x02\0\0\0\x01\0\0\0\x02\0\0\x04\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\
-\0\0\0\x04\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x02\x0d\0\
-\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\0\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\x01\x24\0\0\0\x01\0\0\x0c\x05\0\0\0\xaf\0\0\0\x03\0\0\x04\x18\0\
-\0\0\xbd\0\0\0\x09\0\0\0\0\0\0\0\xc1\0\0\0\x0b\0\0\0\x40\0\0\0\xcc\0\0\0\x0b\0\
-\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x0a\0\0\0\xd4\0\0\0\0\0\0\x07\0\0\0\0\xdd\0\0\
-\0\0\0\0\x08\x0c\0\0\0\xe3\0\0\0\0\0\0\x01\x08\0\0\0\x40\0\0\0\xa4\x01\0\0\x03\
-\0\0\x04\x18\0\0\0\xac\x01\0\0\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\
-\0\xb4\x01\0\0\x0e\0\0\0\xa0\0\0\0\xc0\x01\0\0\0\0\0\x08\x0f\0\0\0\xc6\x01\0\0\
-\0\0\0\x01\x04\0\0\0\x20\0\0\0\xd3\x01\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\
-\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\0\xd8\x01\0\0\0\0\0\x01\x04\
-\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x14\0\0\0\x3c\x02\0\0\x02\0\0\x04\x10\0\0\0\
-\x13\0\0\0\x03\0\0\0\0\0\0\0\x4f\x02\0\0\x15\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\x02\
-\x18\0\0\0\0\0\0\0\x01\0\0\x0d\x06\0\0\0\x1c\0\0\0\x13\0\0\0\x54\x02\0\0\x01\0\
-\0\x0c\x16\0\0\0\xa0\x02\0\0\x01\0\0\x04\x08\0\0\0\xa9\x02\0\0\x19\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x02\x1a\0\0\0\xfa\x02\0\0\x06\0\0\x04\x38\0\0\0\xac\x01\0\0\
-\x0e\0\0\0\0\0\0\0\xaf\x01\0\0\x11\0\0\0\x20\0\0\0\x07\x03\0\0\x1b\0\0\0\xc0\0\
-\0\0\x18\x03\0\0\x15\0\0\0\0\x01\0\0\x21\x03\0\0\x1d\0\0\0\x40\x01\0\0\x2b\x03\
-\0\0\x1e\0\0\0\x80\x01\0\0\0\0\0\0\0\0\0\x02\x1c\0\0\0\0\0\0\0\0\0\0\x0a\x10\0\
-\0\0\0\0\0\0\0\0\0\x02\x1f\0\0\0\0\0\0\0\0\0\0\x02\x20\0\0\0\x75\x03\0\0\x02\0\
-\0\x04\x08\0\0\0\x83\x03\0\0\x0e\0\0\0\0\0\0\0\x8c\x03\0\0\x0e\0\0\0\x20\0\0\0\
-\x2b\x03\0\0\x03\0\0\x04\x18\0\0\0\x96\x03\0\0\x1b\0\0\0\0\0\0\0\x9e\x03\0\0\
-\x21\0\0\0\x40\0\0\0\xa4\x03\0\0\x23\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\x02\x22\0\0\
-\0\0\0\0\0\0\0\0\x02\x24\0\0\0\xa8\x03\0\0\x01\0\0\x04\x04\0\0\0\xb3\x03\0\0\
-\x0e\0\0\0\0\0\0\0\x1c\x04\0\0\x01\0\0\x04\x04\0\0\0\x25\x04\0\0\x0e\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\0\x9b\x04\0\0\0\0\0\
-\x0e\x25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\0\
-\xaf\x04\0\0\0\0\0\x0e\x27\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x1c\0\0\0\
-\x12\0\0\0\x20\0\0\0\xc5\x04\0\0\0\0\0\x0e\x29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\
-\0\0\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\0\xda\x04\0\0\0\0\0\x0e\x2b\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x03\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\0\xf1\x04\0\0\0\0\0\x0e\
-\x2d\0\0\0\x01\0\0\0\xf9\x04\0\0\x04\0\0\x0f\0\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\
-\0\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\
-\0\0\x11\0\0\0\x01\x05\0\0\x01\0\0\x0f\0\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\0\0\0\
-\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\
-\x74\x61\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\
-\x30\x3a\x30\0\x2f\x68\x6f\x6d\x65\x2f\x61\x6c\x72\x75\x61\x2f\x62\x75\x69\x6c\
-\x64\x2f\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\
-\x70\x72\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\
-\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\
-\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\
-\x69\x74\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\
-\x6e\x5f\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\
-\x65\0\x5f\x5f\x75\x36\x34\0\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\
-\x63\x74\x20\x62\x70\x66\x5f\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\
-\x74\x78\x2d\x3e\x6d\x61\x70\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\
-\x30\x3a\x32\0\x09\x5f\x5f\x75\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\
-\x20\x63\x74\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\
-\x3b\0\x09\x69\x66\x20\x28\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\
-\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\
-\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\
-\x20\x20\x20\x20\x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\
-\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\
-\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\
-\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\
-\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\
-\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\
-\x34\x75\x20\x25\x2d\x31\x36\x73\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\
-\x2d\x3e\x69\x64\x2c\x20\x6d\x61\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\
-\x70\x2d\x3e\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\
-\x70\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\
-\x6f\x67\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x20\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\
-\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\
-\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\
-\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\
-\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\
-\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\
-\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\
-\x5f\x61\x75\x78\0\x61\x74\x74\x61\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\
-\x65\0\x64\x73\x74\x5f\x70\x72\x6f\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
-\x62\x74\x66\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\
-\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\
-\x25\x73\x5c\x6e\x22\x2c\x20\x61\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\
-\x30\x3a\x35\0\x09\x69\x66\x20\x28\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\
-\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\
-\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\
-\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\
-\x65\x6e\0\x09\x74\x79\x70\x65\x73\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\
-\x65\x73\x3b\0\x09\x62\x70\x66\x5f\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\
-\x6b\x65\x72\x6e\x65\x6c\x28\x26\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\
-\x29\x2c\x20\x74\x79\x70\x65\x73\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\
-\x09\x73\x74\x72\x20\x3d\x20\x62\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\
-\x3b\0\x62\x74\x66\x5f\x74\x79\x70\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\
-\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\
-\x5f\x52\x45\x41\x44\x28\x74\x2c\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\
-\x30\x3a\x32\x3a\x30\0\x09\x69\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\
-\x3e\x3d\x20\x62\x74\x66\x2d\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\
-\x29\0\x09\x72\x65\x74\x75\x72\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\
-\x5f\x6f\x66\x66\x3b\0\x30\x3a\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\
-\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
-\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\
-\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\
-\x4e\x53\x45\0\x2e\x72\x6f\x64\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\x9f\
-\xeb\x01\0\x20\0\0\0\0\0\0\0\x24\0\0\0\x24\0\0\0\x44\x02\0\0\x68\x02\0\0\xa4\
-\x01\0\0\x08\0\0\0\x31\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\x62\x02\0\0\x01\0\0\0\
-\0\0\0\0\x17\0\0\0\x10\0\0\0\x31\0\0\0\x09\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\
-\x1e\x40\x01\0\x08\0\0\0\x42\0\0\0\x87\0\0\0\x24\x40\x01\0\x10\0\0\0\x42\0\0\0\
-\xfe\0\0\0\x1d\x48\x01\0\x18\0\0\0\x42\0\0\0\x1f\x01\0\0\x06\x50\x01\0\x20\0\0\
-\0\x42\0\0\0\x2e\x01\0\0\x1d\x44\x01\0\x28\0\0\0\x42\0\0\0\x53\x01\0\0\x06\x5c\
-\x01\0\x38\0\0\0\x42\0\0\0\x66\x01\0\0\x03\x60\x01\0\x70\0\0\0\x42\0\0\0\xec\
-\x01\0\0\x02\x68\x01\0\xf0\0\0\0\x42\0\0\0\x3a\x02\0\0\x01\x70\x01\0\x62\x02\0\
-\0\x1a\0\0\0\0\0\0\0\x42\0\0\0\x87\0\0\0\x1e\x84\x01\0\x08\0\0\0\x42\0\0\0\x87\
-\0\0\0\x24\x84\x01\0\x10\0\0\0\x42\0\0\0\x70\x02\0\0\x1f\x8c\x01\0\x18\0\0\0\
-\x42\0\0\0\x94\x02\0\0\x06\x98\x01\0\x20\0\0\0\x42\0\0\0\xad\x02\0\0\x0e\xa4\
-\x01\0\x28\0\0\0\x42\0\0\0\x2e\x01\0\0\x1d\x88\x01\0\x30\0\0\0\x42\0\0\0\x53\
-\x01\0\0\x06\xa8\x01\0\x40\0\0\0\x42\0\0\0\xbf\x02\0\0\x03\xac\x01\0\x80\0\0\0\
-\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xb8\0\0\0\x42\0\0\0\x6a\x03\0\0\x06\x08\
-\x01\0\xd0\0\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\x42\0\0\0\xbb\x03\0\0\x0f\
-\x14\x01\0\xe0\0\0\0\x42\0\0\0\xd0\x03\0\0\x2d\x18\x01\0\xf0\0\0\0\x42\0\0\0\
-\x07\x04\0\0\x0d\x10\x01\0\0\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x08\x01\0\0\x42\
-\0\0\0\xd0\x03\0\0\x02\x18\x01\0\x20\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\
-\0\x38\x01\0\0\x42\0\0\0\0\0\0\0\0\0\0\0\x40\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\
-\x1c\x01\0\x58\x01\0\0\x42\0\0\0\x2e\x04\0\0\x0d\x1c\x01\0\x60\x01\0\0\x42\0\0\
-\0\x5c\x04\0\0\x1b\x20\x01\0\x68\x01\0\0\x42\0\0\0\x5c\x04\0\0\x06\x20\x01\0\
-\x70\x01\0\0\x42\0\0\0\x7f\x04\0\0\x0d\x28\x01\0\x78\x01\0\0\x42\0\0\0\0\0\0\0\
-\0\0\0\0\x80\x01\0\0\x42\0\0\0\x2f\x03\0\0\x02\xb4\x01\0\xf8\x01\0\0\x42\0\0\0\
-\x3a\x02\0\0\x01\xc4\x01\0\x10\0\0\0\x31\0\0\0\x07\0\0\0\0\0\0\0\x02\0\0\0\x3e\
-\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\0\0\xfa\0\
-\0\0\0\0\0\0\x20\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x70\0\0\0\x0d\0\0\0\x3e\0\
-\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\0\xfa\0\0\0\0\0\0\0\xa0\0\0\0\x0d\0\0\0\x2a\x01\
-\0\0\0\0\0\0\x62\x02\0\0\x12\0\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\
-\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x14\0\0\0\xfa\0\0\0\0\0\0\0\x20\0\0\0\
-\x18\0\0\0\x3e\0\0\0\0\0\0\0\x28\0\0\0\x08\0\0\0\x2a\x01\0\0\0\0\0\0\x80\0\0\0\
-\x1a\0\0\0\x3e\0\0\0\0\0\0\0\x90\0\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\xa8\0\0\0\
-\x1a\0\0\0\x62\x03\0\0\0\0\0\0\xb0\0\0\0\x1a\0\0\0\x66\x03\0\0\0\0\0\0\xc0\0\0\
-\0\x1f\0\0\0\x94\x03\0\0\0\0\0\0\xd8\0\0\0\x20\0\0\0\xfa\0\0\0\0\0\0\0\xf0\0\0\
-\0\x20\0\0\0\x3e\0\0\0\0\0\0\0\x18\x01\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\0\x50\x01\
-\0\0\x1a\0\0\0\xfa\0\0\0\0\0\0\0\x60\x01\0\0\x20\0\0\0\x56\x04\0\0\0\0\0\0\x88\
-\x01\0\0\x1a\0\0\0\x2a\x01\0\0\0\0\0\0\x98\x01\0\0\x1a\0\0\0\x97\x04\0\0\0\0\0\
-\0\xa0\x01\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\x91\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xe6\0\0\
-\0\0\0\x02\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\x02\0\xf0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\xdf\0\0\0\0\0\x03\0\x78\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\xd1\0\0\0\0\0\x03\0\x80\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xca\0\0\0\0\0\x03\0\
-\xf8\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x14\0\0\0\x01\0\x04\0\0\0\0\0\0\0\0\0\x23\
-\0\0\0\0\0\0\0\x04\x01\0\0\x01\0\x04\0\x23\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x28\
-\0\0\0\x01\0\x04\0\x31\0\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\xed\0\0\0\x01\0\x04\0\
-\x51\0\0\0\0\0\0\0\x11\0\0\0\0\0\0\0\0\0\0\0\x03\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\0\
-\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc2\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\
-\x04\0\0\0\0\0\0\0\x3d\0\0\0\x12\0\x02\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x5b\
-\0\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\x48\0\0\0\0\0\0\0\x01\0\
-\0\0\x0d\0\0\0\xc8\0\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\x50\0\0\0\0\0\0\0\x01\0\0\
-\0\x0d\0\0\0\xd0\x01\0\0\0\0\0\0\x01\0\0\0\x0d\0\0\0\xf0\x03\0\0\0\0\0\0\x0a\0\
-\0\0\x0d\0\0\0\xfc\x03\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x08\x04\0\0\0\0\0\0\x0a\
-\0\0\0\x0d\0\0\0\x14\x04\0\0\0\0\0\0\x0a\0\0\0\x0d\0\0\0\x2c\x04\0\0\0\0\0\0\0\
-\0\0\0\x0e\0\0\0\x2c\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x50\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x60\0\0\0\0\0\0\0\0\0\0\0\x0b\0\
-\0\0\x70\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\
-\x90\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xb0\0\
-\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xd0\0\0\0\0\
-\0\0\0\0\0\0\0\x0b\0\0\0\xe8\0\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\0\0\0\0\0\0\0\
-\0\0\0\0\x0c\0\0\0\x08\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x01\0\0\0\0\0\0\0\
-\0\0\0\x0c\0\0\0\x28\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x01\0\0\0\0\0\0\0\0\
-\0\0\x0c\0\0\0\x48\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x01\0\0\0\0\0\0\0\0\0\
-\0\x0c\0\0\0\x68\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x88\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x98\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xa8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xb8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xc8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xd8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xe8\x01\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xf8\x01\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x18\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x28\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x38\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x48\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x58\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x68\x02\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x78\x02\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x94\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xa4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xb4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xc4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xd4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\xe4\x02\0\0\0\0\0\0\0\0\0\0\
-\x0b\0\0\0\xf4\x02\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x0c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x2c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x3c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x5c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x6c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x7c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x8c\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x9c\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xac\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xbc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xcc\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xdc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\xec\x03\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\xfc\x03\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x0c\x04\0\0\0\0\0\0\0\0\0\0\
-\x0c\0\0\0\x1c\x04\0\0\0\0\0\0\0\0\0\0\x0c\0\0\0\x4d\x4e\x40\x41\x42\x43\x4c\0\
-\x2e\x74\x65\x78\x74\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\0\x64\
-\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\0\
-\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x2e\x72\x65\x6c\x69\x74\x65\
-\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\
-\x72\x6f\x67\0\x2e\x72\x65\x6c\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\x6f\
-\x67\0\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\0\x6c\x69\x63\x65\
-\x6e\x73\x65\0\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\
-\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x2e\x72\x6f\x64\
-\x61\x74\x61\0\x2e\x72\x65\x6c\x2e\x42\x54\x46\0\x4c\x49\x43\x45\x4e\x53\x45\0\
-\x4c\x42\x42\x31\x5f\x37\0\x4c\x42\x42\x31\x5f\x36\0\x4c\x42\x42\x30\x5f\x34\0\
-\x4c\x42\x42\x31\x5f\x33\0\x4c\x42\x42\x30\x5f\x33\0\x64\x75\x6d\x70\x5f\x62\
-\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x32\0\x64\x75\x6d\
-\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\x74\x2e\x31\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x01\0\0\
-\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4e\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\x6d\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x40\x01\0\0\0\0\0\0\x08\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\xb1\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\x03\0\
-\0\0\0\0\0\x62\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\x89\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\x03\0\0\0\0\0\0\x04\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbd\0\0\0\x01\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xae\x03\0\0\0\0\0\0\x3d\x09\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0b\0\0\0\x01\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x0c\0\0\0\0\0\0\x2c\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa9\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\x18\x11\0\0\0\0\0\0\x98\x01\0\0\0\0\0\0\x0e\0\0\0\x0e\0\0\0\x08\0\0\
-\0\0\0\0\0\x18\0\0\0\0\0\0\0\x4a\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
-\0\xb0\x12\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x02\0\0\0\x08\0\0\0\0\0\0\0\
-\x10\0\0\0\0\0\0\0\x69\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd0\x12\
-\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x08\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\
-\0\0\0\0\xb9\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xf0\x12\0\0\0\0\0\
-\0\x50\0\0\0\0\0\0\0\x08\0\0\0\x06\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\
-\x07\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x13\0\0\0\0\0\0\xe0\
-\x03\0\0\0\0\0\0\x08\0\0\0\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x7b\0\
-\0\0\x03\x4c\xff\x6f\0\0\0\x80\0\0\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\0\0\0\0\x07\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa1\0\0\0\x03\
-\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x27\x17\0\0\0\0\0\0\x1a\x01\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
-
- return 0;
-err:
- bpf_object__destroy_skeleton(s);
- return -1;
-}
-
-#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index f9c734aaa990..a1c0794ae49d 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -8,6 +8,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/capability.h>
+#include <linux/btf_ids.h>
#include "percpu_freelist.h"
#define QUEUE_STACK_CREATE_FLAG_MASK \
@@ -247,7 +248,7 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
return -EINVAL;
}
-static int queue_map_btf_id;
+BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack)
const struct bpf_map_ops queue_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = queue_stack_map_alloc_check,
@@ -260,11 +261,9 @@ const struct bpf_map_ops queue_map_ops = {
.map_pop_elem = queue_map_pop_elem,
.map_peek_elem = queue_map_peek_elem,
.map_get_next_key = queue_stack_map_get_next_key,
- .map_btf_name = "bpf_queue_stack",
- .map_btf_id = &queue_map_btf_id,
+ .map_btf_id = &queue_map_btf_ids[0],
};
-static int stack_map_btf_id;
const struct bpf_map_ops stack_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = queue_stack_map_alloc_check,
@@ -277,6 +276,5 @@ const struct bpf_map_ops stack_map_ops = {
.map_pop_elem = stack_map_pop_elem,
.map_peek_elem = stack_map_peek_elem,
.map_get_next_key = queue_stack_map_get_next_key,
- .map_btf_name = "bpf_queue_stack",
- .map_btf_id = &stack_map_btf_id,
+ .map_btf_id = &queue_map_btf_ids[0],
};
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 556a769b5b80..e2618fb5870e 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -6,6 +6,7 @@
#include <linux/err.h>
#include <linux/sock_diag.h>
#include <net/sock_reuseport.h>
+#include <linux/btf_ids.h>
struct reuseport_array {
struct bpf_map map;
@@ -143,7 +144,7 @@ static void reuseport_array_free(struct bpf_map *map)
/*
* Once reaching here, all sk->sk_user_data is not
- * referenceing this "array". "array" can be freed now.
+ * referencing this "array". "array" can be freed now.
*/
bpf_map_area_free(array);
}
@@ -337,7 +338,7 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
return 0;
}
-static int reuseport_array_map_btf_id;
+BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array)
const struct bpf_map_ops reuseport_array_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = reuseport_array_alloc_check,
@@ -346,6 +347,5 @@ const struct bpf_map_ops reuseport_array_ops = {
.map_lookup_elem = reuseport_array_lookup_elem,
.map_get_next_key = reuseport_array_get_next_key,
.map_delete_elem = reuseport_array_delete_elem,
- .map_btf_name = "reuseport_array",
- .map_btf_id = &reuseport_array_map_btf_id,
+ .map_btf_id = &reuseport_array_map_btf_ids[0],
};
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 710ba9de12ce..ded4faeca192 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -10,6 +10,7 @@
#include <linux/poll.h>
#include <linux/kmemleak.h>
#include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
@@ -263,7 +264,7 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp,
return 0;
}
-static int ringbuf_map_btf_id;
+BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map)
const struct bpf_map_ops ringbuf_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = ringbuf_map_alloc,
@@ -274,8 +275,7 @@ const struct bpf_map_ops ringbuf_map_ops = {
.map_update_elem = ringbuf_map_update_elem,
.map_delete_elem = ringbuf_map_delete_elem,
.map_get_next_key = ringbuf_map_get_next_key,
- .map_btf_name = "bpf_ringbuf_map",
- .map_btf_id = &ringbuf_map_btf_id,
+ .map_btf_id = &ringbuf_map_btf_ids[0],
};
/* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself,
@@ -404,7 +404,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
const struct bpf_func_proto bpf_ringbuf_submit_proto = {
.func = bpf_ringbuf_submit,
.ret_type = RET_VOID,
- .arg1_type = ARG_PTR_TO_ALLOC_MEM,
+ .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
.arg2_type = ARG_ANYTHING,
};
@@ -417,7 +417,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
const struct bpf_func_proto bpf_ringbuf_discard_proto = {
.func = bpf_ringbuf_discard,
.ret_type = RET_VOID,
- .arg1_type = ARG_PTR_TO_ALLOC_MEM,
+ .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
.arg2_type = ARG_ANYTHING,
};
@@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = {
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};
+
+BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
+ struct bpf_dynptr_kern *, ptr)
+{
+ struct bpf_ringbuf_map *rb_map;
+ void *sample;
+ int err;
+
+ if (unlikely(flags)) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ err = bpf_dynptr_check_size(size);
+ if (err) {
+ bpf_dynptr_set_null(ptr);
+ return err;
+ }
+
+ rb_map = container_of(map, struct bpf_ringbuf_map, map);
+
+ sample = __bpf_ringbuf_reserve(rb_map->rb, size);
+ if (!sample) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
+ .func = bpf_ringbuf_reserve_dynptr,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
+};
+
+BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
+{
+ if (!ptr->data)
+ return 0;
+
+ bpf_ringbuf_commit(ptr->data, flags, false /* discard */);
+
+ bpf_dynptr_set_null(ptr);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = {
+ .func = bpf_ringbuf_submit_dynptr,
+ .ret_type = RET_VOID,
+ .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
+{
+ if (!ptr->data)
+ return 0;
+
+ bpf_ringbuf_commit(ptr->data, flags, true /* discard */);
+
+ bpf_dynptr_set_null(ptr);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
+ .func = bpf_ringbuf_discard_dynptr,
+ .ret_type = RET_VOID,
+ .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
+ .arg2_type = ARG_ANYTHING,
+};
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 22c8ae94e4c1..1adbe67cdb95 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -100,13 +100,11 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
- smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
err = get_callchain_buffers(sysctl_perf_event_max_stack);
@@ -132,7 +130,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
int i;
struct mmap_unlock_irq_work *work = NULL;
bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev_vma = NULL;
+ const char *prev_build_id;
/* If the irq_work is in use, fall back to report ips. Same
* fallback is used for kernel stack (!user) on a stackmap with
@@ -150,6 +149,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
}
for (i = 0; i < trace_nr; i++) {
+ if (range_in_vma(prev_vma, ips[i], ips[i])) {
+ vma = prev_vma;
+ memcpy(id_offs[i].build_id, prev_build_id,
+ BUILD_ID_SIZE_MAX);
+ goto build_id_valid;
+ }
vma = find_vma(current->mm, ips[i]);
if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
@@ -158,15 +163,18 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
+build_id_valid:
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
- vma->vm_start;
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+ prev_vma = vma;
+ prev_build_id = id_offs[i].build_id;
}
bpf_mmap_unlock_mm(work, current->mm);
}
static struct perf_callchain_entry *
-get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
+get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
{
#ifdef CONFIG_STACKTRACE
struct perf_callchain_entry *entry;
@@ -177,9 +185,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
if (!entry)
return NULL;
- entry->nr = init_nr +
- stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
- sysctl_perf_event_max_stack - init_nr, 0);
+ entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
+ max_depth, 0);
/* stack_trace_save_tsk() works on unsigned long array, while
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
@@ -191,7 +198,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
int i;
/* copy data from the end to avoid using extra buffer */
- for (i = entry->nr - 1; i >= (int)init_nr; i--)
+ for (i = entry->nr - 1; i >= 0; i--)
to[i] = (u64)(from[i]);
}
@@ -208,27 +215,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
- u32 max_depth = map->value_size / stack_map_data_size(map);
- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
u64 *ips;
bool hash_matches;
- /* get_perf_callchain() guarantees that trace->nr >= init_nr
- * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
- */
- trace_nr = trace->nr - init_nr;
-
- if (trace_nr <= skip)
+ if (trace->nr <= skip)
/* skipping more than usable stack trace */
return -EFAULT;
- trace_nr -= skip;
+ trace_nr = trace->nr - skip;
trace_len = trace_nr * sizeof(u64);
- ips = trace->ip + skip + init_nr;
+ ips = trace->ip + skip;
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
id = hash & (smap->n_buckets - 1);
bucket = READ_ONCE(smap->buckets[id]);
@@ -285,8 +284,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags)
{
u32 max_depth = map->value_size / stack_map_data_size(map);
- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
- u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
@@ -295,8 +293,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
return -EINVAL;
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack, false, false);
+ max_depth += skip;
+ if (max_depth > sysctl_perf_event_max_stack)
+ max_depth = sysctl_perf_event_max_stack;
+
+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+ false, false);
if (unlikely(!trace))
/* couldn't fetch the stack trace */
@@ -387,7 +389,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
- u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+ u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
@@ -412,30 +414,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
goto err_fault;
num_elem = size / elem_size;
- if (sysctl_perf_event_max_stack < num_elem)
- init_nr = 0;
- else
- init_nr = sysctl_perf_event_max_stack - num_elem;
+ max_depth = num_elem + skip;
+ if (sysctl_perf_event_max_stack < max_depth)
+ max_depth = sysctl_perf_event_max_stack;
if (trace_in)
trace = trace_in;
else if (kernel && task)
- trace = get_callchain_entry_for_task(task, init_nr);
+ trace = get_callchain_entry_for_task(task, max_depth);
else
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack,
+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
false, false);
if (unlikely(!trace))
goto err_fault;
- trace_nr = trace->nr - init_nr;
- if (trace_nr < skip)
+ if (trace->nr < skip)
goto err_fault;
- trace_nr -= skip;
+ trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
- ips = trace->ip + skip + init_nr;
+
+ ips = trace->ip + skip;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else
@@ -654,7 +654,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
}
-static int stack_trace_map_btf_id;
+BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map)
const struct bpf_map_ops stack_trace_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = stack_map_alloc,
@@ -664,6 +664,5 @@ const struct bpf_map_ops stack_trace_map_ops = {
.map_update_elem = stack_map_update_elem,
.map_delete_elem = stack_map_delete_elem,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_stack_map",
- .map_btf_id = &stack_trace_map_btf_id,
+ .map_btf_id = &stack_trace_map_btf_ids[0],
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ca70fe6fba38..2b69306d3c6e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6,6 +6,7 @@
#include <linux/bpf_trace.h>
#include <linux/bpf_lirc.h>
#include <linux/bpf_verifier.h>
+#include <linux/bsearch.h>
#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
@@ -29,9 +30,11 @@
#include <linux/pgtable.h>
#include <linux/bpf_lsm.h>
#include <linux/poll.h>
+#include <linux/sort.h>
#include <linux/bpf-netns.h>
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
+#include <linux/trace_events.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -472,14 +475,128 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
+static int bpf_map_kptr_off_cmp(const void *a, const void *b)
+{
+ const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
+
+ if (off_desc1->offset < off_desc2->offset)
+ return -1;
+ else if (off_desc1->offset > off_desc2->offset)
+ return 1;
+ return 0;
+}
+
+struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
+{
+ /* Since members are iterated in btf_find_field in increasing order,
+ * offsets appended to kptr_off_tab are in increasing order, so we can
+ * do bsearch to find exact match.
+ */
+ struct bpf_map_value_off *tab;
+
+ if (!map_value_has_kptrs(map))
+ return NULL;
+ tab = map->kptr_off_tab;
+ return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
+}
+
+void bpf_map_free_kptr_off_tab(struct bpf_map *map)
+{
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ int i;
+
+ if (!map_value_has_kptrs(map))
+ return;
+ for (i = 0; i < tab->nr_off; i++) {
+ if (tab->off[i].kptr.module)
+ module_put(tab->off[i].kptr.module);
+ btf_put(tab->off[i].kptr.btf);
+ }
+ kfree(tab);
+ map->kptr_off_tab = NULL;
+}
+
+struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
+{
+ struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab;
+ int size, i;
+
+ if (!map_value_has_kptrs(map))
+ return ERR_PTR(-ENOENT);
+ size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
+ new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
+ if (!new_tab)
+ return ERR_PTR(-ENOMEM);
+ /* Do a deep copy of the kptr_off_tab */
+ for (i = 0; i < tab->nr_off; i++) {
+ btf_get(tab->off[i].kptr.btf);
+ if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) {
+ while (i--) {
+ if (tab->off[i].kptr.module)
+ module_put(tab->off[i].kptr.module);
+ btf_put(tab->off[i].kptr.btf);
+ }
+ kfree(new_tab);
+ return ERR_PTR(-ENXIO);
+ }
+ }
+ return new_tab;
+}
+
+bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+{
+ struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
+ bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
+ int size;
+
+ if (!a_has_kptr && !b_has_kptr)
+ return true;
+ if (a_has_kptr != b_has_kptr)
+ return false;
+ if (tab_a->nr_off != tab_b->nr_off)
+ return false;
+ size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]);
+ return !memcmp(tab_a, tab_b, size);
+}
+
+/* Caller must ensure map_value_has_kptrs is true. Note that this function can
+ * be called on a map value while the map_value is visible to BPF programs, as
+ * it ensures the correct synchronization, and we already enforce the same using
+ * the bpf_kptr_xchg helper on the BPF program side for referenced kptrs.
+ */
+void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
+{
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ unsigned long *btf_id_ptr;
+ int i;
+
+ for (i = 0; i < tab->nr_off; i++) {
+ struct bpf_map_value_off_desc *off_desc = &tab->off[i];
+ unsigned long old_ptr;
+
+ btf_id_ptr = map_value + off_desc->offset;
+ if (off_desc->type == BPF_KPTR_UNREF) {
+ u64 *p = (u64 *)btf_id_ptr;
+
+ WRITE_ONCE(p, 0);
+ continue;
+ }
+ old_ptr = xchg(btf_id_ptr, 0);
+ off_desc->kptr.dtor((void *)old_ptr);
+ }
+}
+
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
security_bpf_map_free(map);
+ kfree(map->off_arr);
bpf_map_release_memcg(map);
- /* implementation dependent freeing */
+ /* implementation dependent freeing, map_free callback also does
+ * bpf_map_free_kptr_off_tab, if needed.
+ */
map->ops->map_free(map);
}
@@ -556,16 +673,14 @@ static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
- const struct bpf_map *map = filp->private_data;
- const struct bpf_array *array;
+ struct bpf_map *map = filp->private_data;
u32 type = 0, jited = 0;
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
- array = container_of(map, struct bpf_array, map);
- spin_lock(&array->aux->owner.lock);
- type = array->aux->owner.type;
- jited = array->aux->owner.jited;
- spin_unlock(&array->aux->owner.lock);
+ if (map_type_contains_progs(map)) {
+ spin_lock(&map->owner.lock);
+ type = map->owner.type;
+ jited = map->owner.jited;
+ spin_unlock(&map->owner.lock);
}
seq_printf(m,
@@ -641,7 +756,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
int err;
if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
- map_value_has_timer(map))
+ map_value_has_timer(map) || map_value_has_kptrs(map))
return -ENOTSUPP;
if (!(vma->vm_flags & VM_SHARED))
@@ -768,6 +883,84 @@ int map_check_no_btf(const struct bpf_map *map,
return -ENOTSUPP;
}
+static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv)
+{
+ const u32 a = *(const u32 *)_a;
+ const u32 b = *(const u32 *)_b;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ return 0;
+}
+
+static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
+{
+ struct bpf_map *map = (struct bpf_map *)priv;
+ u32 *off_base = map->off_arr->field_off;
+ u32 *a = _a, *b = _b;
+ u8 *sz_a, *sz_b;
+
+ sz_a = map->off_arr->field_sz + (a - off_base);
+ sz_b = map->off_arr->field_sz + (b - off_base);
+
+ swap(*a, *b);
+ swap(*sz_a, *sz_b);
+}
+
+static int bpf_map_alloc_off_arr(struct bpf_map *map)
+{
+ bool has_spin_lock = map_value_has_spin_lock(map);
+ bool has_timer = map_value_has_timer(map);
+ bool has_kptrs = map_value_has_kptrs(map);
+ struct bpf_map_off_arr *off_arr;
+ u32 i;
+
+ if (!has_spin_lock && !has_timer && !has_kptrs) {
+ map->off_arr = NULL;
+ return 0;
+ }
+
+ off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN);
+ if (!off_arr)
+ return -ENOMEM;
+ map->off_arr = off_arr;
+
+ off_arr->cnt = 0;
+ if (has_spin_lock) {
+ i = off_arr->cnt;
+
+ off_arr->field_off[i] = map->spin_lock_off;
+ off_arr->field_sz[i] = sizeof(struct bpf_spin_lock);
+ off_arr->cnt++;
+ }
+ if (has_timer) {
+ i = off_arr->cnt;
+
+ off_arr->field_off[i] = map->timer_off;
+ off_arr->field_sz[i] = sizeof(struct bpf_timer);
+ off_arr->cnt++;
+ }
+ if (has_kptrs) {
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ u32 *off = &off_arr->field_off[off_arr->cnt];
+ u8 *sz = &off_arr->field_sz[off_arr->cnt];
+
+ for (i = 0; i < tab->nr_off; i++) {
+ *off++ = tab->off[i].offset;
+ *sz++ = sizeof(u64);
+ }
+ off_arr->cnt += tab->nr_off;
+ }
+
+ if (off_arr->cnt == 1)
+ return 0;
+ sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]),
+ map_off_arr_cmp, map_off_arr_swap, map);
+ return 0;
+}
+
static int map_check_btf(struct bpf_map *map, const struct btf *btf,
u32 btf_key_id, u32 btf_value_id)
{
@@ -821,10 +1014,34 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EOPNOTSUPP;
}
- if (map->ops->map_check_btf)
+ map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
+ if (map_value_has_kptrs(map)) {
+ if (!bpf_capable()) {
+ ret = -EPERM;
+ goto free_map_tab;
+ }
+ if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
+ ret = -EACCES;
+ goto free_map_tab;
+ }
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ }
+
+ if (map->ops->map_check_btf) {
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
+ if (ret < 0)
+ goto free_map_tab;
+ }
return ret;
+free_map_tab:
+ bpf_map_free_kptr_off_tab(map);
+ return ret;
}
#define BPF_MAP_CREATE_LAST_FIELD map_extra
@@ -874,6 +1091,7 @@ static int map_create(union bpf_attr *attr)
atomic64_set(&map->refcnt, 1);
atomic64_set(&map->usercnt, 1);
mutex_init(&map->freeze_mutex);
+ spin_lock_init(&map->owner.lock);
map->spin_lock_off = -EINVAL;
map->timer_off = -EINVAL;
@@ -912,10 +1130,14 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_alloc(map);
+ err = bpf_map_alloc_off_arr(map);
if (err)
goto free_map;
+ err = security_bpf_map_alloc(map);
+ if (err)
+ goto free_map_off_arr;
+
err = bpf_map_alloc_id(map);
if (err)
goto free_map_sec;
@@ -938,6 +1160,8 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
+free_map_off_arr:
+ kfree(map->off_arr);
free_map:
btf_put(map->btf);
map->ops->map_free(map);
@@ -986,6 +1210,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
return map;
}
+EXPORT_SYMBOL(bpf_map_get);
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
@@ -1352,7 +1577,6 @@ int generic_map_delete_batch(struct bpf_map *map,
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
if (err)
break;
cond_resched();
@@ -1361,6 +1585,8 @@ int generic_map_delete_batch(struct bpf_map *map,
err = -EFAULT;
kvfree(key);
+
+ maybe_wait_bpf_programs(map);
return err;
}
@@ -1637,7 +1863,7 @@ static int map_freeze(const union bpf_attr *attr)
return PTR_ERR(map);
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
- map_value_has_timer(map)) {
+ map_value_has_timer(map) || map_value_has_kptrs(map)) {
fdput(f);
return -ENOTSUPP;
}
@@ -2220,7 +2446,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
BPF_F_ANY_ALIGNMENT |
BPF_F_TEST_STATE_FREQ |
BPF_F_SLEEPABLE |
- BPF_F_TEST_RND_HI32))
+ BPF_F_TEST_RND_HI32 |
+ BPF_F_XDP_HAS_FRAGS))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2306,6 +2533,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
prog->aux->dst_prog = dst_prog;
prog->aux->offload_requested = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
+ prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
err = security_bpf_prog_alloc(prog->aux);
if (err)
@@ -2491,6 +2719,7 @@ void bpf_link_put(struct bpf_link *link)
bpf_link_free(link);
}
}
+EXPORT_SYMBOL(bpf_link_put);
static int bpf_link_release(struct inode *inode, struct file *filp)
{
@@ -2562,7 +2791,7 @@ static int bpf_link_alloc_id(struct bpf_link *link)
* pre-allocated resources are to be freed with bpf_cleanup() call. All the
* transient state is passed around in struct bpf_link_primer.
* This is preferred way to create and initialize bpf_link, especially when
- * there are complicated and expensive operations inbetween creating bpf_link
+ * there are complicated and expensive operations in between creating bpf_link
* itself and attaching it to BPF hook. By using bpf_link_prime() and
* bpf_link_settle() kernel code using bpf_link doesn't have to perform
* expensive (and potentially failing) roll back operations in a rare case
@@ -2633,20 +2862,14 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
return link;
}
-
-struct bpf_tracing_link {
- struct bpf_link link;
- enum bpf_attach_type attach_type;
- struct bpf_trampoline *trampoline;
- struct bpf_prog *tgt_prog;
-};
+EXPORT_SYMBOL(bpf_link_get_from_fd);
static void bpf_tracing_link_release(struct bpf_link *link)
{
struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link);
+ container_of(link, struct bpf_tracing_link, link.link);
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
tr_link->trampoline));
bpf_trampoline_put(tr_link->trampoline);
@@ -2659,7 +2882,7 @@ static void bpf_tracing_link_release(struct bpf_link *link)
static void bpf_tracing_link_dealloc(struct bpf_link *link)
{
struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link);
+ container_of(link, struct bpf_tracing_link, link.link);
kfree(tr_link);
}
@@ -2668,7 +2891,7 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link);
+ container_of(link, struct bpf_tracing_link, link.link);
seq_printf(seq,
"attach_type:\t%d\n",
@@ -2679,7 +2902,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link);
+ container_of(link, struct bpf_tracing_link, link.link);
info->tracing.attach_type = tr_link->attach_type;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
@@ -2698,7 +2921,8 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
static int bpf_tracing_prog_attach(struct bpf_prog *prog,
int tgt_prog_fd,
- u32 btf_id)
+ u32 btf_id,
+ u64 bpf_cookie)
{
struct bpf_link_primer link_primer;
struct bpf_prog *tgt_prog = NULL;
@@ -2760,9 +2984,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
err = -ENOMEM;
goto out_put_prog;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
+ bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING,
&bpf_tracing_link_lops, prog);
link->attach_type = prog->expected_attach_type;
+ link->link.cookie = bpf_cookie;
mutex_lock(&prog->aux->dst_mutex);
@@ -2830,11 +3055,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
tgt_prog = prog->aux->dst_prog;
}
- err = bpf_link_prime(&link->link, &link_primer);
+ err = bpf_link_prime(&link->link.link, &link_primer);
if (err)
goto out_unlock;
- err = bpf_trampoline_link_prog(prog, tr);
+ err = bpf_trampoline_link_prog(&link->link, tr);
if (err) {
bpf_link_cleanup(&link_primer);
link = NULL;
@@ -3017,68 +3242,52 @@ out_put_file:
fput(perf_file);
return err;
}
+#else
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_PERF_EVENTS */
-#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
-
-static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
+static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
+ const char __user *user_tp_name)
{
struct bpf_link_primer link_primer;
struct bpf_raw_tp_link *link;
struct bpf_raw_event_map *btp;
- struct bpf_prog *prog;
const char *tp_name;
char buf[128];
int err;
- if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
- return -EINVAL;
-
- prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_EXT:
case BPF_PROG_TYPE_LSM:
- if (attr->raw_tracepoint.name) {
+ if (user_tp_name)
/* The attach point for this category of programs
* should be specified via btf_id during program load.
*/
- err = -EINVAL;
- goto out_put_prog;
- }
+ return -EINVAL;
if (prog->type == BPF_PROG_TYPE_TRACING &&
prog->expected_attach_type == BPF_TRACE_RAW_TP) {
tp_name = prog->aux->attach_func_name;
break;
}
- err = bpf_tracing_prog_attach(prog, 0, 0);
- if (err >= 0)
- return err;
- goto out_put_prog;
+ return bpf_tracing_prog_attach(prog, 0, 0, 0);
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
- if (strncpy_from_user(buf,
- u64_to_user_ptr(attr->raw_tracepoint.name),
- sizeof(buf) - 1) < 0) {
- err = -EFAULT;
- goto out_put_prog;
- }
+ if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0)
+ return -EFAULT;
buf[sizeof(buf) - 1] = 0;
tp_name = buf;
break;
default:
- err = -EINVAL;
- goto out_put_prog;
+ return -EINVAL;
}
btp = bpf_get_raw_tracepoint(tp_name);
- if (!btp) {
- err = -ENOENT;
- goto out_put_prog;
- }
+ if (!btp)
+ return -ENOENT;
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
@@ -3105,11 +3314,29 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
out_put_btp:
bpf_put_raw_tracepoint(btp);
-out_put_prog:
- bpf_prog_put(prog);
return err;
}
+#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
+
+static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ int fd;
+
+ if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name));
+ if (fd < 0)
+ bpf_prog_put(prog);
+ return fd;
+}
+
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type)
{
@@ -3178,7 +3405,13 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_SETSOCKOPT:
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
case BPF_TRACE_ITER:
+ case BPF_TRACE_RAW_TP:
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
+ case BPF_LSM_MAC:
+ return BPF_PROG_TYPE_LSM;
case BPF_SK_LOOKUP:
return BPF_PROG_TYPE_SK_LOOKUP;
case BPF_XDP:
@@ -3321,12 +3554,17 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_FLOW_DISSECTOR:
case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
+ case BPF_SK_SKB_STREAM_PARSER:
+ case BPF_SK_SKB_STREAM_VERDICT:
+ case BPF_SK_MSG_VERDICT:
+ case BPF_SK_SKB_VERDICT:
+ return sock_map_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
}
}
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -4230,22 +4468,7 @@ err_put:
return err;
}
-static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
- struct bpf_prog *prog)
-{
- if (attr->link_create.attach_type != prog->expected_attach_type)
- return -EINVAL;
-
- if (prog->expected_attach_type == BPF_TRACE_ITER)
- return bpf_iter_link_attach(attr, uattr, prog);
- else if (prog->type == BPF_PROG_TYPE_EXT)
- return bpf_tracing_prog_attach(prog,
- attr->link_create.target_fd,
- attr->link_create.target_btf_id);
- return -EINVAL;
-}
-
-#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
+#define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies
static int link_create(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type ptype;
@@ -4266,16 +4489,20 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
switch (prog->type) {
case BPF_PROG_TYPE_EXT:
- ret = tracing_bpf_link_attach(attr, uattr, prog);
- goto out;
+ break;
case BPF_PROG_TYPE_PERF_EVENT:
- case BPF_PROG_TYPE_KPROBE:
case BPF_PROG_TYPE_TRACEPOINT:
if (attr->link_create.attach_type != BPF_PERF_EVENT) {
ret = -EINVAL;
goto out;
}
- ptype = prog->type;
+ break;
+ case BPF_PROG_TYPE_KPROBE:
+ if (attr->link_create.attach_type != BPF_PERF_EVENT &&
+ attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
+ ret = -EINVAL;
+ goto out;
+ }
break;
default:
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
@@ -4286,7 +4513,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
break;
}
- switch (ptype) {
+ switch (prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
@@ -4296,8 +4523,27 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
ret = cgroup_bpf_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_EXT:
+ ret = bpf_tracing_prog_attach(prog,
+ attr->link_create.target_fd,
+ attr->link_create.target_btf_id,
+ attr->link_create.tracing.cookie);
+ break;
+ case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_TRACING:
- ret = tracing_bpf_link_attach(attr, uattr, prog);
+ if (attr->link_create.attach_type != prog->expected_attach_type) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ ret = bpf_raw_tp_link_attach(prog, NULL);
+ else if (prog->expected_attach_type == BPF_TRACE_ITER)
+ ret = bpf_iter_link_attach(attr, uattr, prog);
+ else
+ ret = bpf_tracing_prog_attach(prog,
+ attr->link_create.target_fd,
+ attr->link_create.target_btf_id,
+ attr->link_create.tracing.cookie);
break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_SK_LOOKUP:
@@ -4308,13 +4554,16 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
#endif
-#ifdef CONFIG_PERF_EVENTS
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
- case BPF_PROG_TYPE_KPROBE:
ret = bpf_perf_link_attach(attr, prog);
break;
-#endif
+ case BPF_PROG_TYPE_KPROBE:
+ if (attr->link_create.attach_type == BPF_PERF_EVENT)
+ ret = bpf_perf_link_attach(attr, prog);
+ else
+ ret = bpf_kprobe_multi_link_attach(attr, prog);
+ break;
default:
ret = -EINVAL;
}
@@ -4428,6 +4677,25 @@ struct bpf_link *bpf_link_by_id(u32 id)
return link;
}
+struct bpf_link *bpf_link_get_curr_or_next(u32 *id)
+{
+ struct bpf_link *link;
+
+ spin_lock_bh(&link_idr_lock);
+again:
+ link = idr_get_next(&link_idr, id);
+ if (link) {
+ link = bpf_link_inc_not_zero(link);
+ if (IS_ERR(link)) {
+ (*id)++;
+ goto again;
+ }
+ }
+ spin_unlock_bh(&link_idr_lock);
+
+ return link;
+}
+
#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
@@ -4595,9 +4863,21 @@ out_prog_put:
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
+ bool capable;
int err;
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+ capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
+
+ /* Intent here is for unprivileged_bpf_disabled to block key object
+ * creation commands for unprivileged users; other actions depend
+ * of fd availability and access to bpffs, so are dependent on
+ * object creation success. Capabilities are later verified for
+ * operations such as load and map create, so even with unprivileged
+ * BPF disabled, capability checks are still carried out for these
+ * and other operations.
+ */
+ if (!capable &&
+ (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
return -EPERM;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
@@ -4753,23 +5033,55 @@ static bool syscall_prog_is_valid_access(int off, int size,
return true;
}
-BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
+BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
{
+ struct bpf_prog * __maybe_unused prog;
+ struct bpf_tramp_run_ctx __maybe_unused run_ctx;
+
switch (cmd) {
case BPF_MAP_CREATE:
case BPF_MAP_UPDATE_ELEM:
case BPF_MAP_FREEZE:
case BPF_PROG_LOAD:
case BPF_BTF_LOAD:
+ case BPF_LINK_CREATE:
+ case BPF_RAW_TRACEPOINT_OPEN:
break;
- /* case BPF_PROG_TEST_RUN:
- * is not part of this list to prevent recursive test_run
- */
+#ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */
+ case BPF_PROG_TEST_RUN:
+ if (attr->test.data_in || attr->test.data_out ||
+ attr->test.ctx_out || attr->test.duration ||
+ attr->test.repeat || attr->test.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (attr->test.ctx_size_in < prog->aux->max_ctx_offset ||
+ attr->test.ctx_size_in > U16_MAX) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ run_ctx.bpf_cookie = 0;
+ run_ctx.saved_run_ctx = NULL;
+ if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
+ /* recursion detected */
+ bpf_prog_put(prog);
+ return -EBUSY;
+ }
+ attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
+ __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
+ bpf_prog_put(prog);
+ return 0;
+#endif
default:
return -EINVAL;
}
return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
}
+EXPORT_SYMBOL(bpf_sys_bpf);
static const struct bpf_func_proto bpf_sys_bpf_proto = {
.func = bpf_sys_bpf,
@@ -4853,3 +5165,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
const struct bpf_prog_ops bpf_syscall_prog_ops = {
.test_run = bpf_prog_test_run_syscall,
};
+
+#ifdef CONFIG_SYSCTL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct static_key *key = (struct static_key *)table->data;
+ static int saved_val;
+ int val, ret;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mutex_lock(&bpf_stats_enabled_mutex);
+ val = saved_val;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret && val != saved_val) {
+ if (val)
+ static_key_slow_inc(key);
+ else
+ static_key_slow_dec(key);
+ saved_val = val;
+ }
+ mutex_unlock(&bpf_stats_enabled_mutex);
+ return ret;
+}
+
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+
+ unpriv_ebpf_notify(unpriv_enable);
+
+ return ret;
+}
+
+static struct ctl_table bpf_syscall_table[] = {
+ {
+ .procname = "unprivileged_bpf_disabled",
+ .data = &sysctl_unprivileged_bpf_disabled,
+ .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
+ .mode = 0644,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "bpf_stats_enabled",
+ .data = &bpf_stats_enabled_key.key,
+ .maxlen = sizeof(bpf_stats_enabled_key),
+ .mode = 0644,
+ .proc_handler = bpf_stats_handler,
+ },
+ { }
+};
+
+static int __init bpf_syscall_sysctl_init(void)
+{
+ register_sysctl_init("kernel", bpf_syscall_table);
+ return 0;
+}
+late_initcall(bpf_syscall_sysctl_init);
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index d94696198ef8..8c921799def4 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -99,7 +99,6 @@ static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
if (!prog)
return 0;
- meta.seq = seq;
ctx.meta = &meta;
ctx.task = task;
return bpf_iter_run_prog(prog, &ctx);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5e7edf913060..93c7675f0c9e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -30,9 +30,12 @@ static DEFINE_MUTEX(trampoline_mutex);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
{
enum bpf_attach_type eatype = prog->expected_attach_type;
+ enum bpf_prog_type ptype = prog->type;
- return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN;
+ return (ptype == BPF_PROG_TYPE_TRACING &&
+ (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_MODIFY_RETURN)) ||
+ (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
void *bpf_jit_alloc_exec_page(void)
@@ -45,7 +48,7 @@ void *bpf_jit_alloc_exec_page(void)
set_vm_flush_reset_perms(image);
/* Keep image as writeable. The alternative is to keep flipping ro/rw
- * everytime new program is attached or detached.
+ * every time new program is attached or detached.
*/
set_memory_x((long)image, 1);
return image;
@@ -117,18 +120,6 @@ static void bpf_trampoline_module_put(struct bpf_trampoline *tr)
tr->mod = NULL;
}
-static int is_ftrace_location(void *ip)
-{
- long addr;
-
- addr = ftrace_location((long)ip);
- if (!addr)
- return 0;
- if (WARN_ON_ONCE(addr != (long)ip))
- return -EFAULT;
- return 1;
-}
-
static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
{
void *ip = tr->func.addr;
@@ -160,12 +151,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad
static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
{
void *ip = tr->func.addr;
+ unsigned long faddr;
int ret;
- ret = is_ftrace_location(ip);
- if (ret < 0)
- return ret;
- tr->func.ftrace_managed = ret;
+ faddr = ftrace_location((unsigned long)ip);
+ if (faddr)
+ tr->func.ftrace_managed = true;
if (bpf_trampoline_module_get(tr))
return -ENOENT;
@@ -180,30 +171,30 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
-static struct bpf_tramp_progs *
+static struct bpf_tramp_links *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
- const struct bpf_prog_aux *aux;
- struct bpf_tramp_progs *tprogs;
- struct bpf_prog **progs;
+ struct bpf_tramp_link *link;
+ struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_link **links;
int kind;
*total = 0;
- tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
- if (!tprogs)
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks)
return ERR_PTR(-ENOMEM);
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
- tprogs[kind].nr_progs = tr->progs_cnt[kind];
+ tlinks[kind].nr_links = tr->progs_cnt[kind];
*total += tr->progs_cnt[kind];
- progs = tprogs[kind].progs;
+ links = tlinks[kind].links;
- hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
- *ip_arg |= aux->prog->call_get_func_ip;
- *progs++ = aux->prog;
+ hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
+ *ip_arg |= link->link.prog->call_get_func_ip;
+ *links++ = link;
}
}
- return tprogs;
+ return tlinks;
}
static void __bpf_tramp_image_put_deferred(struct work_struct *work)
@@ -213,7 +204,7 @@ static void __bpf_tramp_image_put_deferred(struct work_struct *work)
im = container_of(work, struct bpf_tramp_image, work);
bpf_image_ksym_del(&im->ksym);
bpf_jit_free_exec(im->image);
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
percpu_ref_exit(&im->pcref);
kfree_rcu(im, rcu);
}
@@ -310,7 +301,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
if (!im)
goto out;
- err = bpf_jit_charge_modmem(1);
+ err = bpf_jit_charge_modmem(PAGE_SIZE);
if (err)
goto out_free_im;
@@ -332,7 +323,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
out_free_image:
bpf_jit_free_exec(im->image);
out_uncharge:
- bpf_jit_uncharge_modmem(1);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
out_free_im:
kfree(im);
out:
@@ -342,14 +333,14 @@ out:
static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
struct bpf_tramp_image *im;
- struct bpf_tramp_progs *tprogs;
+ struct bpf_tramp_links *tlinks;
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
bool ip_arg = false;
int err, total;
- tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg);
- if (IS_ERR(tprogs))
- return PTR_ERR(tprogs);
+ tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
+ if (IS_ERR(tlinks))
+ return PTR_ERR(tlinks);
if (total == 0) {
err = unregister_fentry(tr, tr->cur_image->image);
@@ -365,15 +356,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
}
- if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
- tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
+ if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
+ tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links)
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
if (ip_arg)
flags |= BPF_TRAMP_F_IP_ARG;
err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
- &tr->func.model, flags, tprogs,
+ &tr->func.model, flags, tlinks,
tr->func.addr);
if (err < 0)
goto out;
@@ -393,7 +384,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
tr->cur_image = im;
tr->selector++;
out:
- kfree(tprogs);
+ kfree(tlinks);
return err;
}
@@ -419,13 +410,14 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
}
}
-int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
+ struct bpf_tramp_link *link_exiting;
int err = 0;
- int cnt;
+ int cnt = 0, i;
- kind = bpf_attach_type_to_tramp(prog);
+ kind = bpf_attach_type_to_tramp(link->link.prog);
mutex_lock(&tr->mutex);
if (tr->extension_prog) {
/* cannot attach fentry/fexit if extension prog is attached.
@@ -434,32 +426,43 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
err = -EBUSY;
goto out;
}
- cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
+
+ for (i = 0; i < BPF_TRAMP_MAX; i++)
+ cnt += tr->progs_cnt[i];
+
if (kind == BPF_TRAMP_REPLACE) {
/* Cannot attach extension if fentry/fexit are in use. */
if (cnt) {
err = -EBUSY;
goto out;
}
- tr->extension_prog = prog;
+ tr->extension_prog = link->link.prog;
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
- prog->bpf_func);
+ link->link.prog->bpf_func);
goto out;
}
- if (cnt >= BPF_MAX_TRAMP_PROGS) {
+ if (cnt >= BPF_MAX_TRAMP_LINKS) {
err = -E2BIG;
goto out;
}
- if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+ if (!hlist_unhashed(&link->tramp_hlist)) {
/* prog already linked */
err = -EBUSY;
goto out;
}
- hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
+ hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
+ if (link_exiting->link.prog != link->link.prog)
+ continue;
+ /* prog already linked */
+ err = -EBUSY;
+ goto out;
+ }
+
+ hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
tr->progs_cnt[kind]++;
err = bpf_trampoline_update(tr);
if (err) {
- hlist_del_init(&prog->aux->tramp_hlist);
+ hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
}
out:
@@ -468,12 +471,12 @@ out:
}
/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
int err;
- kind = bpf_attach_type_to_tramp(prog);
+ kind = bpf_attach_type_to_tramp(link->link.prog);
mutex_lock(&tr->mutex);
if (kind == BPF_TRAMP_REPLACE) {
WARN_ON_ONCE(!tr->extension_prog);
@@ -482,7 +485,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
tr->extension_prog = NULL;
goto out;
}
- hlist_del_init(&prog->aux->tramp_hlist);
+ hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
err = bpf_trampoline_update(tr);
out:
@@ -512,16 +515,19 @@ out:
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
+ int i;
+
if (!tr)
return;
mutex_lock(&trampoline_mutex);
if (!refcount_dec_and_test(&tr->refcnt))
goto out;
WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
- goto out;
- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
- goto out;
+
+ for (i = 0; i < BPF_TRAMP_MAX; i++)
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
+ goto out;
+
/* This code will be executed even when the last bpf_tramp_image
* is alive. All progs are detached from the trampoline and the
* trampoline image is patched with jmp into epilogue to skip
@@ -571,11 +577,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
* [2..MAX_U64] - execute bpf prog and record execution time.
* This is start time.
*/
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
rcu_read_lock();
migrate_disable();
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
@@ -605,29 +614,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
}
}
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
__releases(RCU)
{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
migrate_enable();
rcu_read_unlock();
}
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
{
rcu_read_lock_trace();
migrate_disable();
might_fault();
+
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
}
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
return bpf_prog_start_time();
}
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx)
{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
migrate_enable();
@@ -647,7 +665,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_progs *tprogs,
+ struct bpf_tramp_links *tlinks,
void *orig_call)
{
return -ENOTSUPP;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a39eedecc93a..aedac2ac02b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem {
POISON_POINTER_DELTA))
#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
+static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
+
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
@@ -245,6 +248,7 @@ struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
bool pkt_access;
+ u8 release_regno;
int regno;
int access_size;
int mem_size;
@@ -257,6 +261,8 @@ struct bpf_call_arg_meta {
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
+ struct bpf_map_value_off_desc *kptr_off_desc;
+ u8 uninit_dynptr_regno;
};
struct btf *btf_vmlinux;
@@ -452,7 +458,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
return base_type(type) == PTR_TO_SOCKET ||
base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM;
+ base_type(type) == PTR_TO_MEM ||
+ base_type(type) == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
@@ -470,17 +477,6 @@ static bool type_may_be_null(u32 type)
return type & PTR_MAYBE_NULL;
}
-/* Determine whether the function releases some resources allocated by another
- * function call. The first reference type argument will be assumed to be
- * released by release_reference().
- */
-static bool is_release_function(enum bpf_func_id func_id)
-{
- return func_id == BPF_FUNC_sk_release ||
- func_id == BPF_FUNC_ringbuf_submit ||
- func_id == BPF_FUNC_ringbuf_discard;
-}
-
static bool may_be_acquire_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_lookup_tcp ||
@@ -498,7 +494,8 @@ static bool is_acquire_function(enum bpf_func_id func_id,
if (func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp ||
- func_id == BPF_FUNC_ringbuf_reserve)
+ func_id == BPF_FUNC_ringbuf_reserve ||
+ func_id == BPF_FUNC_kptr_xchg)
return true;
if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -516,6 +513,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_skc_to_tcp_sock ||
func_id == BPF_FUNC_skc_to_tcp6_sock ||
func_id == BPF_FUNC_skc_to_udp6_sock ||
+ func_id == BPF_FUNC_skc_to_mptcp_sock ||
func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
@@ -535,10 +533,10 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
static const char *reg_type_str(struct bpf_verifier_env *env,
enum bpf_reg_type type)
{
- char postfix[16] = {0}, prefix[16] = {0};
+ char postfix[16] = {0}, prefix[32] = {0};
static const char * const str[] = {
[NOT_INIT] = "?",
- [SCALAR_VALUE] = "inv",
+ [SCALAR_VALUE] = "scalar",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
@@ -553,7 +551,6 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
@@ -561,17 +558,22 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
};
if (type & PTR_MAYBE_NULL) {
- if (base_type(type) == PTR_TO_BTF_ID ||
- base_type(type) == PTR_TO_PERCPU_BTF_ID)
+ if (base_type(type) == PTR_TO_BTF_ID)
strncpy(postfix, "or_null_", 16);
else
strncpy(postfix, "_or_null", 16);
}
if (type & MEM_RDONLY)
- strncpy(prefix, "rdonly_", 16);
+ strncpy(prefix, "rdonly_", 32);
if (type & MEM_ALLOC)
- strncpy(prefix, "alloc_", 16);
+ strncpy(prefix, "alloc_", 32);
+ if (type & MEM_USER)
+ strncpy(prefix, "user_", 32);
+ if (type & MEM_PERCPU)
+ strncpy(prefix, "percpu_", 32);
+ if (type & PTR_UNTRUSTED)
+ strncpy(prefix, "untrusted_", 32);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -583,6 +585,7 @@ static char slot_type_char[] = {
[STACK_SPILL] = 'r',
[STACK_MISC] = 'm',
[STACK_ZERO] = '0',
+ [STACK_DYNPTR] = 'd',
};
static void print_liveness(struct bpf_verifier_env *env,
@@ -598,6 +601,25 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "D");
}
+static int get_spi(s32 off)
+{
+ return (-off - 1) / BPF_REG_SIZE;
+}
+
+static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
+{
+ int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
+
+ /* We need to check that slots between [spi - nr_slots + 1, spi] are
+ * within [0, allocated_stack).
+ *
+ * Please note that the spi grows downwards. For example, a dynptr
+ * takes the size of two stack slots; the first slot will be at
+ * spi and the second slot will be at spi - 1.
+ */
+ return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
+}
+
static struct bpf_func_state *func(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg)
{
@@ -649,6 +671,132 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL;
}
+static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
+{
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ return BPF_DYNPTR_TYPE_LOCAL;
+ case DYNPTR_TYPE_RINGBUF:
+ return BPF_DYNPTR_TYPE_RINGBUF;
+ default:
+ return BPF_DYNPTR_TYPE_INVALID;
+ }
+}
+
+static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
+{
+ return type == BPF_DYNPTR_TYPE_RINGBUF;
+}
+
+static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type, int insn_idx)
+{
+ struct bpf_func_state *state = func(env, reg);
+ enum bpf_dynptr_type type;
+ int spi, i, id;
+
+ spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return -EINVAL;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ state->stack[spi].slot_type[i] = STACK_DYNPTR;
+ state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
+ }
+
+ type = arg_to_dynptr_type(arg_type);
+ if (type == BPF_DYNPTR_TYPE_INVALID)
+ return -EINVAL;
+
+ state->stack[spi].spilled_ptr.dynptr.first_slot = true;
+ state->stack[spi].spilled_ptr.dynptr.type = type;
+ state->stack[spi - 1].spilled_ptr.dynptr.type = type;
+
+ if (dynptr_type_refcounted(type)) {
+ /* The id is used to track proper releasing */
+ id = acquire_reference_state(env, insn_idx);
+ if (id < 0)
+ return id;
+
+ state->stack[spi].spilled_ptr.id = id;
+ state->stack[spi - 1].spilled_ptr.id = id;
+ }
+
+ return 0;
+}
+
+static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i;
+
+ spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return -EINVAL;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ state->stack[spi].slot_type[i] = STACK_INVALID;
+ state->stack[spi - 1].slot_type[i] = STACK_INVALID;
+ }
+
+ /* Invalidate any slices associated with this dynptr */
+ if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+ release_reference(env, state->stack[spi].spilled_ptr.id);
+ state->stack[spi].spilled_ptr.id = 0;
+ state->stack[spi - 1].spilled_ptr.id = 0;
+ }
+
+ state->stack[spi].spilled_ptr.dynptr.first_slot = false;
+ state->stack[spi].spilled_ptr.dynptr.type = 0;
+ state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
+
+ return 0;
+}
+
+static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+ int i;
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return true;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
+ state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+ int i;
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.dynptr.first_slot)
+ return false;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
+ state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
+ return false;
+ }
+
+ /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
+ if (arg_type == ARG_PTR_TO_DYNPTR)
+ return true;
+
+ return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
+}
+
/* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack.
*/
@@ -682,74 +830,79 @@ static void print_verifier_state(struct bpf_verifier_env *env,
continue;
verbose(env, " R%d", i);
print_liveness(env, reg->live);
- verbose(env, "=%s", reg_type_str(env, t));
+ verbose(env, "=");
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
+ verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (base_type(t) == PTR_TO_BTF_ID ||
- base_type(t) == PTR_TO_PERCPU_BTF_ID)
+ const char *sep = "";
+
+ verbose(env, "%s", reg_type_str(env, t));
+ if (base_type(t) == PTR_TO_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
- verbose(env, "(id=%d", reg->id);
- if (reg_type_may_be_refcounted_or_null(t))
- verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
+ verbose(env, "(");
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
+
+ if (reg->id)
+ verbose_a("id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
+ verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
- verbose(env, ",off=%d", reg->off);
+ verbose_a("off=%d", reg->off);
if (type_is_pkt_pointer(t))
- verbose(env, ",r=%d", reg->range);
+ verbose_a("r=%d", reg->range);
else if (base_type(t) == CONST_PTR_TO_MAP ||
base_type(t) == PTR_TO_MAP_KEY ||
base_type(t) == PTR_TO_MAP_VALUE)
- verbose(env, ",ks=%d,vs=%d",
- reg->map_ptr->key_size,
- reg->map_ptr->value_size);
+ verbose_a("ks=%d,vs=%d",
+ reg->map_ptr->key_size,
+ reg->map_ptr->value_size);
if (tnum_is_const(reg->var_off)) {
/* Typically an immediate SCALAR_VALUE, but
* could be a pointer whose offset is too big
* for reg->off
*/
- verbose(env, ",imm=%llx", reg->var_off.value);
+ verbose_a("imm=%llx", reg->var_off.value);
} else {
if (reg->smin_value != reg->umin_value &&
reg->smin_value != S64_MIN)
- verbose(env, ",smin_value=%lld",
- (long long)reg->smin_value);
+ verbose_a("smin=%lld", (long long)reg->smin_value);
if (reg->smax_value != reg->umax_value &&
reg->smax_value != S64_MAX)
- verbose(env, ",smax_value=%lld",
- (long long)reg->smax_value);
+ verbose_a("smax=%lld", (long long)reg->smax_value);
if (reg->umin_value != 0)
- verbose(env, ",umin_value=%llu",
- (unsigned long long)reg->umin_value);
+ verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
if (reg->umax_value != U64_MAX)
- verbose(env, ",umax_value=%llu",
- (unsigned long long)reg->umax_value);
+ verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
if (!tnum_is_unknown(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, ",var_off=%s", tn_buf);
+ verbose_a("var_off=%s", tn_buf);
}
if (reg->s32_min_value != reg->smin_value &&
reg->s32_min_value != S32_MIN)
- verbose(env, ",s32_min_value=%d",
- (int)(reg->s32_min_value));
+ verbose_a("s32_min=%d", (int)(reg->s32_min_value));
if (reg->s32_max_value != reg->smax_value &&
reg->s32_max_value != S32_MAX)
- verbose(env, ",s32_max_value=%d",
- (int)(reg->s32_max_value));
+ verbose_a("s32_max=%d", (int)(reg->s32_max_value));
if (reg->u32_min_value != reg->umin_value &&
reg->u32_min_value != U32_MIN)
- verbose(env, ",u32_min_value=%d",
- (int)(reg->u32_min_value));
+ verbose_a("u32_min=%d", (int)(reg->u32_min_value));
if (reg->u32_max_value != reg->umax_value &&
reg->u32_max_value != U32_MAX)
- verbose(env, ",u32_max_value=%d",
- (int)(reg->u32_max_value));
+ verbose_a("u32_max=%d", (int)(reg->u32_max_value));
}
+#undef verbose_a
+
verbose(env, ")");
}
}
@@ -774,7 +927,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (is_spilled_reg(&state->stack[i])) {
reg = &state->stack[i].spilled_ptr;
t = reg->type;
- verbose(env, "=%s", reg_type_str(env, t));
+ verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
@@ -1546,14 +1699,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno,
enum bpf_reg_type reg_type,
- struct btf *btf, u32 btf_id)
+ struct btf *btf, u32 btf_id,
+ enum bpf_type_flag flag)
{
if (reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, regno);
return;
}
mark_reg_known_zero(env, regs, regno);
- regs[regno].type = PTR_TO_BTF_ID;
+ regs[regno].type = PTR_TO_BTF_ID | flag;
regs[regno].btf = btf;
regs[regno].btf_id = btf_id;
}
@@ -1743,7 +1897,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
}
static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
- s16 offset, struct module **btf_modp)
+ s16 offset)
{
struct bpf_kfunc_btf kf_btf = { .offset = offset };
struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1951,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_btf_cmp_by_off, NULL);
}
- if (btf_modp)
- *btf_modp = b->module;
return b->btf;
}
@@ -1814,9 +1966,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
kfree(tab);
}
-static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
- u32 func_id, s16 offset,
- struct module **btf_modp)
+static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
{
if (offset) {
if (offset < 0) {
@@ -1827,7 +1977,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
return ERR_PTR(-EINVAL);
}
- return __find_kfunc_desc_btf(env, offset, btf_modp);
+ return __find_kfunc_desc_btf(env, offset);
}
return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
@@ -1841,6 +1991,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
struct bpf_kfunc_desc *desc;
const char *func_name;
struct btf *desc_btf;
+ unsigned long call_imm;
unsigned long addr;
int err;
@@ -1890,7 +2041,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
prog_aux->kfunc_btf_tab = btf_tab;
}
- desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+ desc_btf = find_kfunc_desc_btf(env, offset);
if (IS_ERR(desc_btf)) {
verbose(env, "failed to find BTF for kernel function\n");
return PTR_ERR(desc_btf);
@@ -1925,9 +2076,17 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -EINVAL;
}
+ call_imm = BPF_CALL_IMM(addr);
+ /* Check whether or not the relative offset overflows desc->imm */
+ if ((unsigned long)(s32)call_imm != call_imm) {
+ verbose(env, "address of kernel function %s is out of range\n",
+ func_name);
+ return -EINVAL;
+ }
+
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
- desc->imm = BPF_CALL_IMM(addr);
+ desc->imm = call_imm;
desc->offset = offset;
err = btf_distill_func_proto(&env->log, desc_btf,
func_proto, func_name,
@@ -2351,7 +2510,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
return NULL;
- desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+ desc_btf = find_kfunc_desc_btf(data, insn->off);
if (IS_ERR(desc_btf))
return "<error>";
@@ -2767,7 +2926,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BUF:
- case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
@@ -3197,7 +3355,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
return 0;
}
-enum stack_access_src {
+enum bpf_access_src {
ACCESS_DIRECT = 1, /* the access is performed by an instruction */
ACCESS_HELPER = 2, /* the access is performed by a helper */
};
@@ -3205,7 +3363,7 @@ enum stack_access_src {
static int check_stack_range_initialized(struct bpf_verifier_env *env,
int regno, int off, int access_size,
bool zero_size_allowed,
- enum stack_access_src type,
+ enum bpf_access_src type,
struct bpf_call_arg_meta *meta);
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
@@ -3455,9 +3613,175 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
return 0;
}
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ bool fixed_off_ok)
+{
+ /* Access to this pointer-typed register or passing it to a helper
+ * is only allowed in its original, unmodified form.
+ */
+
+ if (reg->off < 0) {
+ verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
+ return -EACCES;
+ }
+
+ if (!fixed_off_ok && reg->off) {
+ verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno)
+{
+ return __check_ptr_off_reg(env, reg, regno, false);
+}
+
+static int map_kptr_match_type(struct bpf_verifier_env *env,
+ struct bpf_map_value_off_desc *off_desc,
+ struct bpf_reg_state *reg, u32 regno)
+{
+ const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
+ int perm_flags = PTR_MAYBE_NULL;
+ const char *reg_name = "";
+
+ /* Only unreferenced case accepts untrusted pointers */
+ if (off_desc->type == BPF_KPTR_UNREF)
+ perm_flags |= PTR_UNTRUSTED;
+
+ if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
+ goto bad_type;
+
+ if (!btf_is_kernel(reg->btf)) {
+ verbose(env, "R%d must point to kernel BTF\n", regno);
+ return -EINVAL;
+ }
+ /* We need to verify reg->type and reg->btf, before accessing reg->btf */
+ reg_name = kernel_type_name(reg->btf, reg->btf_id);
+
+ /* For ref_ptr case, release function check should ensure we get one
+ * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
+ * normal store of unreferenced kptr, we must ensure var_off is zero.
+ * Since ref_ptr cannot be accessed directly by BPF insns, checks for
+ * reg->off and reg->ref_obj_id are not needed here.
+ */
+ if (__check_ptr_off_reg(env, reg, regno, true))
+ return -EACCES;
+
+ /* A full type match is needed, as BTF can be vmlinux or module BTF, and
+ * we also need to take into account the reg->off.
+ *
+ * We want to support cases like:
+ *
+ * struct foo {
+ * struct bar br;
+ * struct baz bz;
+ * };
+ *
+ * struct foo *v;
+ * v = func(); // PTR_TO_BTF_ID
+ * val->foo = v; // reg->off is zero, btf and btf_id match type
+ * val->bar = &v->br; // reg->off is still zero, but we need to retry with
+ * // first member type of struct after comparison fails
+ * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
+ * // to match type
+ *
+ * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
+ * is zero. We must also ensure that btf_struct_ids_match does not walk
+ * the struct to match type against first member of struct, i.e. reject
+ * second case from above. Hence, when type is BPF_KPTR_REF, we set
+ * strict mode to true for type match.
+ */
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ off_desc->kptr.btf, off_desc->kptr.btf_id,
+ off_desc->type == BPF_KPTR_REF))
+ goto bad_type;
+ return 0;
+bad_type:
+ verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
+ reg_type_str(env, reg->type), reg_name);
+ verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
+ if (off_desc->type == BPF_KPTR_UNREF)
+ verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
+ targ_name);
+ else
+ verbose(env, "\n");
+ return -EINVAL;
+}
+
+static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
+ int value_regno, int insn_idx,
+ struct bpf_map_value_off_desc *off_desc)
+{
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+ int class = BPF_CLASS(insn->code);
+ struct bpf_reg_state *val_reg;
+
+ /* Things we already checked for in check_map_access and caller:
+ * - Reject cases where variable offset may touch kptr
+ * - size of access (must be BPF_DW)
+ * - tnum_is_const(reg->var_off)
+ * - off_desc->offset == off + reg->var_off.value
+ */
+ /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
+ if (BPF_MODE(insn->code) != BPF_MEM) {
+ verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
+ return -EACCES;
+ }
+
+ /* We only allow loading referenced kptr, since it will be marked as
+ * untrusted, similar to unreferenced kptr.
+ */
+ if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
+ verbose(env, "store to referenced kptr disallowed\n");
+ return -EACCES;
+ }
+
+ if (class == BPF_LDX) {
+ val_reg = reg_state(env, value_regno);
+ /* We can simply mark the value_regno receiving the pointer
+ * value from map as PTR_TO_BTF_ID, with the correct type.
+ */
+ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
+ off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ /* For mark_ptr_or_null_reg */
+ val_reg->id = ++env->id_gen;
+ } else if (class == BPF_STX) {
+ val_reg = reg_state(env, value_regno);
+ if (!register_is_null(val_reg) &&
+ map_kptr_match_type(env, off_desc, val_reg, value_regno))
+ return -EACCES;
+ } else if (class == BPF_ST) {
+ if (insn->imm) {
+ verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
+ off_desc->offset);
+ return -EACCES;
+ }
+ } else {
+ verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
+ return -EACCES;
+ }
+ return 0;
+}
+
/* check read/write into a map element with possible variable offset */
static int check_map_access(struct bpf_verifier_env *env, u32 regno,
- int off, int size, bool zero_size_allowed)
+ int off, int size, bool zero_size_allowed,
+ enum bpf_access_src src)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -3493,16 +3817,41 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
}
+ if (map_value_has_kptrs(map)) {
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ int i;
+
+ for (i = 0; i < tab->nr_off; i++) {
+ u32 p = tab->off[i].offset;
+
+ if (reg->smin_value + off < p + sizeof(u64) &&
+ p < reg->umax_value + off + size) {
+ if (src != ACCESS_DIRECT) {
+ verbose(env, "kptr cannot be accessed indirectly by helper\n");
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "kptr access cannot have variable offset\n");
+ return -EACCES;
+ }
+ if (p != off + reg->var_off.value) {
+ verbose(env, "kptr access misaligned expected=%u off=%llu\n",
+ p, off + reg->var_off.value);
+ return -EACCES;
+ }
+ if (size != bpf_size_to_bytes(BPF_DW)) {
+ verbose(env, "kptr access size must be BPF_DW\n");
+ return -EACCES;
+ }
+ break;
+ }
+ }
+ }
return err;
}
#define MAX_PACKET_OFF 0xffff
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
- return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
@@ -3971,38 +4320,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
}
#endif
-static int __check_ptr_off_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno,
- bool fixed_off_ok)
-{
- /* Access to this pointer-typed register or passing it to a helper
- * is only allowed in its original, unmodified form.
- */
-
- if (!fixed_off_ok && reg->off) {
- verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
- reg_type_str(env, reg->type), regno, reg->off);
- return -EACCES;
- }
-
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable %s access var_off=%s disallowed\n",
- reg_type_str(env, reg->type), tn_buf);
- return -EACCES;
- }
-
- return 0;
-}
-
-int check_ptr_off_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno)
-{
- return __check_ptr_off_reg(env, reg, regno, false);
-}
-
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
const struct bpf_reg_state *reg,
@@ -4047,9 +4364,9 @@ static int check_buffer_access(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int regno, int off, int size,
bool zero_size_allowed,
- const char *buf_info,
u32 *max_access)
{
+ const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
int err;
err = __check_buffer_access(env, buf_info, reg, regno, off, size);
@@ -4159,6 +4476,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = regs + regno;
const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+ enum bpf_type_flag flag = 0;
u32 btf_id;
int ret;
@@ -4178,9 +4496,23 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (reg->type & MEM_USER) {
+ verbose(env,
+ "R%d is ptr_%s access user memory: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
+ if (reg->type & MEM_PERCPU) {
+ verbose(env,
+ "R%d is ptr_%s access percpu memory: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
if (env->ops->btf_struct_access) {
ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
- off, size, atype, &btf_id);
+ off, size, atype, &btf_id, &flag);
} else {
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
@@ -4188,14 +4520,20 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
ret = btf_struct_access(&env->log, reg->btf, t, off, size,
- atype, &btf_id);
+ atype, &btf_id, &flag);
}
if (ret < 0)
return ret;
+ /* If this is an untrusted pointer, all pointers formed by walking it
+ * also inherit the untrusted flag.
+ */
+ if (type_flag(reg->type) & PTR_UNTRUSTED)
+ flag |= PTR_UNTRUSTED;
+
if (atype == BPF_READ && value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
return 0;
}
@@ -4208,6 +4546,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
{
struct bpf_reg_state *reg = regs + regno;
struct bpf_map *map = reg->map_ptr;
+ enum bpf_type_flag flag = 0;
const struct btf_type *t;
const char *tname;
u32 btf_id;
@@ -4245,12 +4584,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return -EACCES;
}
- ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
+ ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
if (ret < 0)
return ret;
if (value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
return 0;
}
@@ -4285,7 +4624,7 @@ static int check_stack_slot_within_bounds(int off,
static int check_stack_access_within_bounds(
struct bpf_verifier_env *env,
int regno, int off, int access_size,
- enum stack_access_src src, enum bpf_access_type type)
+ enum bpf_access_src src, enum bpf_access_type type)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
@@ -4381,6 +4720,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
+ struct bpf_map_value_off_desc *kptr_off_desc = NULL;
+
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into map\n", value_regno);
@@ -4389,8 +4730,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_map_access_type(env, regno, off, size, t);
if (err)
return err;
- err = check_map_access(env, regno, off, size, false);
- if (!err && t == BPF_READ && value_regno >= 0) {
+ err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
+ if (err)
+ return err;
+ if (tnum_is_const(reg->var_off))
+ kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
+ off + reg->var_off.value);
+ if (kptr_off_desc) {
+ err = check_map_kptr_access(env, regno, value_regno, insn_idx,
+ kptr_off_desc);
+ } else if (t == BPF_READ && value_regno >= 0) {
struct bpf_map *map = reg->map_ptr;
/* if map is read-only, track its contents as scalars */
@@ -4451,7 +4800,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
+ &btf_id);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -4535,7 +4885,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
- } else if (reg->type == PTR_TO_BTF_ID) {
+ } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
+ !type_may_be_null(reg->type)) {
err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
value_regno);
} else if (reg->type == CONST_PTR_TO_MAP) {
@@ -4543,7 +4894,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
} else if (base_type(reg->type) == PTR_TO_BUF) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
- const char *buf_info;
u32 *max_access;
if (rdonly_mem) {
@@ -4552,15 +4902,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
regno, reg_type_str(env, reg->type));
return -EACCES;
}
- buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
- buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
err = check_buffer_access(env, reg, regno, off, size, false,
- buf_info, max_access);
+ max_access);
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
mark_reg_unknown(env, regs, value_regno);
@@ -4694,7 +5042,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
static int check_stack_range_initialized(
struct bpf_verifier_env *env, int regno, int off,
int access_size, bool zero_size_allowed,
- enum stack_access_src type, struct bpf_call_arg_meta *meta)
+ enum bpf_access_src type, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
@@ -4781,7 +5129,7 @@ static int check_stack_range_initialized(
}
if (is_spilled_reg(&state->stack[spi]) &&
- state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
+ base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
goto mark;
if (is_spilled_reg(&state->stack[spi]) &&
@@ -4823,7 +5171,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- const char *buf_info;
u32 *max_access;
switch (base_type(reg->type)) {
@@ -4832,6 +5179,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
@@ -4840,25 +5192,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
BPF_READ))
return -EACCES;
return check_map_access(env, regno, reg->off, access_size,
- zero_size_allowed);
+ zero_size_allowed, ACCESS_HELPER);
case PTR_TO_MEM:
+ if (type_is_rdonly_mem(reg->type)) {
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+ }
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
case PTR_TO_BUF:
if (type_is_rdonly_mem(reg->type)) {
- if (meta && meta->raw_mode)
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
return -EACCES;
+ }
- buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
- buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
return check_buffer_access(env, reg, regno, reg->off,
access_size, zero_size_allowed,
- buf_info, max_access);
+ max_access);
case PTR_TO_STACK:
return check_stack_range_initialized(
env,
@@ -4877,27 +5237,119 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+{
+ int err;
+
+ /* This is used to refine r0 return value bounds for helpers
+ * that enforce this value as an upper bound on return values.
+ * See do_refine_retval_range() for helpers that can refine
+ * the return value. C type of helper is u32 so we pull register
+ * bound from umax_value however, if negative verifier errors
+ * out. Only upper bounds can be learned because retval is an
+ * int type and negative retvals are allowed.
+ */
+ meta->msize_max_value = reg->umax_value;
+
+ /* The register is SCALAR_VALUE; the access check
+ * happens using its boundaries.
+ */
+ if (!tnum_is_const(reg->var_off))
+ /* For unprivileged variable accesses, disable raw
+ * mode so that the program is required to
+ * initialize all the memory that the helper could
+ * just partially fill up.
+ */
+ meta = NULL;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (reg->umin_value == 0) {
+ err = check_helper_mem_access(env, regno - 1, 0,
+ zero_size_allowed,
+ meta);
+ if (err)
+ return err;
+ }
+
+ if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_helper_mem_access(env, regno - 1,
+ reg->umax_value,
+ zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
+ return err;
+}
+
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size)
{
+ bool may_be_null = type_may_be_null(reg->type);
+ struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
+ int err;
+
if (register_is_null(reg))
return 0;
- if (type_may_be_null(reg->type)) {
- /* Assuming that the register contains a value check if the memory
- * access is safe. Temporarily save and restore the register's state as
- * the conversion shouldn't be visible to a caller.
- */
- const struct bpf_reg_state saved_reg = *reg;
- int rv;
-
+ memset(&meta, 0, sizeof(meta));
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+ */
+ if (may_be_null) {
+ saved_reg = *reg;
mark_ptr_not_null_reg(reg);
- rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
+ }
+
+ err = check_helper_mem_access(env, regno, mem_size, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
+
+ if (may_be_null)
*reg = saved_reg;
- return rv;
+
+ return err;
+}
+
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno)
+{
+ struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+ bool may_be_null = type_may_be_null(mem_reg->type);
+ struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
+ int err;
+
+ WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+ memset(&meta, 0, sizeof(meta));
+
+ if (may_be_null) {
+ saved_reg = *mem_reg;
+ mark_ptr_not_null_reg(mem_reg);
}
- return check_helper_mem_access(env, regno, mem_size, true, NULL);
+ err = check_mem_size_reg(env, reg, regno, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
+
+ if (may_be_null)
+ *mem_reg = saved_reg;
+ return err;
}
/* Implementation details:
@@ -5029,10 +5481,51 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
return 0;
}
-static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+static int process_kptr_func(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
{
- return base_type(type) == ARG_PTR_TO_MEM ||
- base_type(type) == ARG_PTR_TO_UNINIT_MEM;
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_map_value_off_desc *off_desc;
+ struct bpf_map *map_ptr = reg->map_ptr;
+ u32 kptr_off;
+ int ret;
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env,
+ "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
+ regno);
+ return -EINVAL;
+ }
+ if (!map_ptr->btf) {
+ verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
+ map_ptr->name);
+ return -EINVAL;
+ }
+ if (!map_value_has_kptrs(map_ptr)) {
+ ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
+ if (ret == -E2BIG)
+ verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
+ BPF_MAP_VALUE_OFF_MAX);
+ else if (ret == -EEXIST)
+ verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
+ else
+ verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
+ return -EINVAL;
+ }
+
+ meta->map_ptr = map_ptr;
+ kptr_off = reg->off + reg->var_off.value;
+ off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
+ if (!off_desc) {
+ verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
+ return -EACCES;
+ }
+ if (off_desc->type != BPF_KPTR_REF) {
+ verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
+ return -EACCES;
+ }
+ meta->kptr_off_desc = off_desc;
+ return 0;
}
static bool arg_type_is_mem_size(enum bpf_arg_type type)
@@ -5052,6 +5545,16 @@ static bool arg_type_is_int_ptr(enum bpf_arg_type type)
type == ARG_PTR_TO_LONG;
}
+static bool arg_type_is_release(enum bpf_arg_type type)
+{
+ return type & OBJ_RELEASE;
+}
+
+static bool arg_type_is_dynptr(enum bpf_arg_type type)
+{
+ return base_type(type) == ARG_PTR_TO_DYNPTR;
+}
+
static int int_ptr_type_to_size(enum bpf_arg_type type)
{
if (type == ARG_PTR_TO_INT)
@@ -5159,16 +5662,16 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | ME
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
-static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
- [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
[ARG_CONST_SIZE] = &scalar_types,
[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
@@ -5182,7 +5685,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
- [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
[ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
@@ -5191,11 +5693,14 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_STACK] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
+ [ARG_PTR_TO_KPTR] = &kptr_types,
+ [ARG_PTR_TO_DYNPTR] = &stack_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
- const u32 *arg_btf_id)
+ const u32 *arg_btf_id,
+ struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected, type = reg->type;
@@ -5240,6 +5745,13 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
found:
if (reg->type == PTR_TO_BTF_ID) {
+ /* For bpf_sk_release, it needs to match against first member
+ * 'struct sock_common', hence make an exception for it. This
+ * allows bpf_sk_release to work for multiple socket types.
+ */
+ bool strict_type_match = arg_type_is_release(arg_type) &&
+ meta->func_id != BPF_FUNC_sk_release;
+
if (!arg_btf_id) {
if (!compatible->btf_id) {
verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
@@ -5248,8 +5760,12 @@ found:
arg_btf_id = compatible->btf_id;
}
- if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
- btf_vmlinux, *arg_btf_id)) {
+ if (meta->func_id == BPF_FUNC_kptr_xchg) {
+ if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
+ return -EACCES;
+ } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ btf_vmlinux, *arg_btf_id,
+ strict_type_match)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, kernel_type_name(reg->btf, reg->btf_id),
kernel_type_name(btf_vmlinux, *arg_btf_id));
@@ -5260,6 +5776,70 @@ found:
return 0;
}
+int check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ enum bpf_arg_type arg_type)
+{
+ enum bpf_reg_type type = reg->type;
+ bool fixed_off_ok = false;
+
+ switch ((u32)type) {
+ /* Pointer types where reg offset is explicitly allowed: */
+ case PTR_TO_STACK:
+ if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
+ verbose(env, "cannot pass in dynptr at an offset\n");
+ return -EINVAL;
+ }
+ fallthrough;
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM | MEM_RDONLY:
+ case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_BUF:
+ case PTR_TO_BUF | MEM_RDONLY:
+ case SCALAR_VALUE:
+ /* Some of the argument types nevertheless require a
+ * zero register offset.
+ */
+ if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
+ return 0;
+ break;
+ /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
+ * fixed offset.
+ */
+ case PTR_TO_BTF_ID:
+ /* When referenced PTR_TO_BTF_ID is passed to release function,
+ * it's fixed offset must be 0. In the other cases, fixed offset
+ * can be non-zero.
+ */
+ if (arg_type_is_release(arg_type) && reg->off) {
+ verbose(env, "R%d must have zero offset when passed to release func\n",
+ regno);
+ return -EINVAL;
+ }
+ /* For arg is release pointer, fixed_off_ok must be false, but
+ * we already checked and rejected reg->off != 0 above, so set
+ * to true to allow fixed offset for all other cases.
+ */
+ fixed_off_ok = true;
+ break;
+ default:
+ break;
+ }
+ return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
+}
+
+static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+
+ return state->stack[spi].spilled_ptr.id;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
@@ -5292,8 +5872,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
- if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
- base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
+ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
err = resolve_map_arg_type(env, meta, &arg_type);
if (err)
return err;
@@ -5305,40 +5884,37 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/
goto skip_type_check;
- err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
+ err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
if (err)
return err;
- switch ((u32)type) {
- case SCALAR_VALUE:
- /* Pointer types where reg offset is explicitly allowed: */
- case PTR_TO_PACKET:
- case PTR_TO_PACKET_META:
- case PTR_TO_MAP_KEY:
- case PTR_TO_MAP_VALUE:
- case PTR_TO_MEM:
- case PTR_TO_MEM | MEM_RDONLY:
- case PTR_TO_MEM | MEM_ALLOC:
- case PTR_TO_BUF:
- case PTR_TO_BUF | MEM_RDONLY:
- case PTR_TO_STACK:
- /* Some of the argument types nevertheless require a
- * zero register offset.
- */
- if (arg_type == ARG_PTR_TO_ALLOC_MEM)
- goto force_off_check;
- break;
- /* All the rest must be rejected: */
- default:
-force_off_check:
- err = __check_ptr_off_reg(env, reg, regno,
- type == PTR_TO_BTF_ID);
- if (err < 0)
- return err;
- break;
- }
+ err = check_func_arg_reg_off(env, reg, regno, arg_type);
+ if (err)
+ return err;
skip_type_check:
+ if (arg_type_is_release(arg_type)) {
+ if (arg_type_is_dynptr(arg_type)) {
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.id) {
+ verbose(env, "arg %d is an unacquired reference\n", regno);
+ return -EINVAL;
+ }
+ } else if (!reg->ref_obj_id && !register_is_null(reg)) {
+ verbose(env, "R%d must be referenced when passed to release function\n",
+ regno);
+ return -EINVAL;
+ }
+ if (meta->release_regno) {
+ verbose(env, "verifier internal error: more than one release argument\n");
+ return -EFAULT;
+ }
+ meta->release_regno = regno;
+ }
+
if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -5391,8 +5967,7 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
- base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
+ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
if (type_may_be_null(arg_type) && register_is_null(reg))
return 0;
@@ -5404,7 +5979,7 @@ skip_type_check:
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
- meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
+ meta->raw_mode = arg_type & MEM_UNINIT;
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
@@ -5431,59 +6006,49 @@ skip_type_check:
return -EACCES;
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
- } else if (arg_type_is_mem_ptr(arg_type)) {
+ } else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
*/
- meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
+ meta->raw_mode = arg_type & MEM_UNINIT;
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
- /* This is used to refine r0 return value bounds for helpers
- * that enforce this value as an upper bound on return values.
- * See do_refine_retval_range() for helpers that can refine
- * the return value. C type of helper is u32 so we pull register
- * bound from umax_value however, if negative verifier errors
- * out. Only upper bounds can be learned because retval is an
- * int type and negative retvals are allowed.
- */
- meta->msize_max_value = reg->umax_value;
+ err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
+ } else if (arg_type_is_dynptr(arg_type)) {
+ if (arg_type & MEM_UNINIT) {
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
+ verbose(env, "Dynptr has to be an uninitialized dynptr\n");
+ return -EINVAL;
+ }
- /* The register is SCALAR_VALUE; the access check
- * happens using its boundaries.
- */
- if (!tnum_is_const(reg->var_off))
- /* For unprivileged variable accesses, disable raw
- * mode so that the program is required to
- * initialize all the memory that the helper could
- * just partially fill up.
+ /* We only support one dynptr being uninitialized at the moment,
+ * which is sufficient for the helper functions we have right now.
*/
- meta = NULL;
+ if (meta->uninit_dynptr_regno) {
+ verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
+ return -EFAULT;
+ }
- if (reg->smin_value < 0) {
- verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
- regno);
- return -EACCES;
- }
+ meta->uninit_dynptr_regno = regno;
+ } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
+ const char *err_extra = "";
- if (reg->umin_value == 0) {
- err = check_helper_mem_access(env, regno - 1, 0,
- zero_size_allowed,
- meta);
- if (err)
- return err;
- }
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ err_extra = "local ";
+ break;
+ case DYNPTR_TYPE_RINGBUF:
+ err_extra = "ringbuf ";
+ break;
+ default:
+ break;
+ }
- if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
- verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
- regno);
- return -EACCES;
+ verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
+ err_extra, arg + 1);
+ return -EINVAL;
}
- err = check_helper_mem_access(env, regno - 1,
- reg->umax_value,
- zero_size_allowed, meta);
- if (!err)
- err = mark_chain_precision(env, regno);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
@@ -5520,7 +6085,8 @@ skip_type_check:
}
err = check_map_access(env, regno, reg->off,
- map->value_size - reg->off, false);
+ map->value_size - reg->off, false,
+ ACCESS_HELPER);
if (err)
return err;
@@ -5536,6 +6102,9 @@ skip_type_check:
verbose(env, "string is not zero-terminated\n");
return -EINVAL;
}
+ } else if (arg_type == ARG_PTR_TO_KPTR) {
+ if (process_kptr_func(env, regno, meta))
+ return -EACCES;
}
return err;
@@ -5601,7 +6170,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_RINGBUF:
if (func_id != BPF_FUNC_ringbuf_output &&
func_id != BPF_FUNC_ringbuf_reserve &&
- func_id != BPF_FUNC_ringbuf_query)
+ func_id != BPF_FUNC_ringbuf_query &&
+ func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
+ func_id != BPF_FUNC_ringbuf_submit_dynptr &&
+ func_id != BPF_FUNC_ringbuf_discard_dynptr)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@@ -5717,6 +6289,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_ringbuf_output:
case BPF_FUNC_ringbuf_reserve:
case BPF_FUNC_ringbuf_query:
+ case BPF_FUNC_ringbuf_reserve_dynptr:
+ case BPF_FUNC_ringbuf_submit_dynptr:
+ case BPF_FUNC_ringbuf_discard_dynptr:
if (map->map_type != BPF_MAP_TYPE_RINGBUF)
goto error;
break;
@@ -5771,6 +6346,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
goto error;
break;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
+ goto error;
+ break;
case BPF_FUNC_sk_storage_get:
case BPF_FUNC_sk_storage_delete:
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
@@ -5822,10 +6403,8 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
enum bpf_arg_type arg_next)
{
- return (arg_type_is_mem_ptr(arg_curr) &&
- !arg_type_is_mem_size(arg_next)) ||
- (!arg_type_is_mem_ptr(arg_curr) &&
- arg_type_is_mem_size(arg_next));
+ return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
+ arg_type_is_mem_size(arg_next);
}
static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
@@ -5836,7 +6415,7 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification.
*/
if (arg_type_is_mem_size(fn->arg1_type) ||
- arg_type_is_mem_ptr(fn->arg5_type) ||
+ base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
@@ -5878,17 +6457,18 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
- if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
- if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
return false;
}
return true;
}
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
+ struct bpf_call_arg_meta *meta)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
@@ -6383,7 +6963,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_pop_elem &&
func_id != BPF_FUNC_map_peek_elem &&
func_id != BPF_FUNC_for_each_map_elem &&
- func_id != BPF_FUNC_redirect_map)
+ func_id != BPF_FUNC_redirect_map &&
+ func_id != BPF_FUNC_map_lookup_percpu_elem)
return 0;
if (map == NULL) {
@@ -6572,7 +7153,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn, func_id);
+ err = check_func_proto(fn, func_id, &meta);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -6605,8 +7186,35 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
- if (is_release_function(func_id)) {
- err = release_reference(env, meta.ref_obj_id);
+ regs = cur_regs(env);
+
+ if (meta.uninit_dynptr_regno) {
+ /* we write BPF_DW bits (8 bytes) at a time */
+ for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
+ err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
+ i, BPF_DW, BPF_WRITE, -1, false);
+ if (err)
+ return err;
+ }
+
+ err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
+ fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
+ insn_idx);
+ if (err)
+ return err;
+ }
+
+ if (meta.release_regno) {
+ err = -EINVAL;
+ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
+ err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
+ else if (meta.ref_obj_id)
+ err = release_reference(env, meta.ref_obj_id);
+ /* meta.ref_obj_id can only be 0 if register that is meant to be
+ * released is NULL, which must be > R0.
+ */
+ else if (register_is_null(&regs[meta.release_regno]))
+ err = 0;
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -6614,8 +7222,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
}
- regs = cur_regs(env);
-
switch (func_id) {
case BPF_FUNC_tail_call:
err = check_reference_leak(env);
@@ -6652,6 +7258,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_loop_callback_state);
break;
+ case BPF_FUNC_dynptr_from_mem:
+ if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
+ verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
+ reg_type_str(env, regs[BPF_REG_1].type));
+ return -EACCES;
+ }
}
if (err)
@@ -6739,21 +7351,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
+ struct btf *ret_btf;
int ret_btf_id;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
- ret_btf_id = *fn->ret_btf_id;
+ if (func_id == BPF_FUNC_kptr_xchg) {
+ ret_btf = meta.kptr_off_desc->kptr.btf;
+ ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
+ } else {
+ ret_btf = btf_vmlinux;
+ ret_btf_id = *fn->ret_btf_id;
+ }
if (ret_btf_id == 0) {
verbose(env, "invalid return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id),
func_id);
return -EINVAL;
}
- /* current BPF helper definitions are only coming from
- * built-in code with type IDs from vmlinux BTF
- */
- regs[BPF_REG_0].btf = btf_vmlinux;
+ regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
} else {
verbose(env, "unknown return type %u of func %s#%d\n",
@@ -6776,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
+ } else if (func_id == BPF_FUNC_dynptr_data) {
+ int dynptr_id = 0, i;
+
+ /* Find the id of the dynptr we're acquiring a reference to */
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (dynptr_id) {
+ verbose(env, "verifier internal error: multiple dynptr args in func\n");
+ return -EFAULT;
+ }
+ dynptr_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
+ }
+ }
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = dynptr_id;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
@@ -6842,22 +7473,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
u32 i, nargs, func_id, ptr_type_id;
- struct module *btf_mod = NULL;
+ int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
- int err;
+ bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
if (!insn->imm)
return 0;
- desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+ desc_btf = find_kfunc_desc_btf(env, insn->off);
if (IS_ERR(desc_btf))
return PTR_ERR(desc_btf);
@@ -6866,23 +7498,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!env->ops->check_kfunc_call ||
- !env->ops->check_kfunc_call(func_id, btf_mod)) {
+ if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_CHECK, func_id)) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
+ acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
/* Check the arguments */
err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
- if (err)
+ if (err < 0)
return err;
+ /* In case of release function, we get register number of refcounted
+ * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+ */
+ if (err) {
+ err = release_reference(env, regs[err].ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+ if (acq && !btf_type_is_ptr(t)) {
+ verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+ return -EINVAL;
+ }
+
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +7553,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+ if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+ /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+ regs[BPF_REG_0].id = ++env->id_gen;
+ }
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+ if (acq) {
+ int id = acquire_reference_state(env, insn_idx);
+
+ if (id < 0)
+ return id;
+ regs[BPF_REG_0].id = id;
+ regs[BPF_REG_0].ref_obj_id = id;
+ }
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
nargs = btf_type_vlen(func_proto);
@@ -7305,7 +7971,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
return -EACCES;
break;
case PTR_TO_MAP_VALUE:
- if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+ if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
verbose(env, "R%d pointer arithmetic of map value goes out of range, "
"prohibited for !root\n", dst);
return -EACCES;
@@ -9548,7 +10214,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
dst_reg->mem_size = aux->btf_var.mem_size;
break;
case PTR_TO_BTF_ID:
- case PTR_TO_PERCPU_BTF_ID:
dst_reg->btf = aux->btf_var.btf;
dst_reg->btf_id = aux->btf_var.btf_id;
break;
@@ -10273,8 +10938,7 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
aux->func_info[i].insn_off = env->subprog_info[i].start;
}
-#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
- sizeof(((struct bpf_line_info *)(0))->line_col))
+#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
static int check_btf_line(struct bpf_verifier_env *env,
@@ -11549,7 +12213,7 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
- err = check_kfunc_call(env, insn);
+ err = check_kfunc_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
@@ -11748,7 +12412,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
type = t->type;
t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
- aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+ aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
@@ -12696,7 +13360,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (!ctx_access)
continue;
- switch (env->insn_aux_data[i + delta].ptr_type) {
+ switch ((int)env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
continue;
@@ -12713,6 +13377,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_UNTRUSTED:
if (type == BPF_READ) {
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
@@ -12897,6 +13562,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
+ func[i]->blinding_requested = prog->blinding_requested;
func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
func[i]->aux->linfo = prog->aux->linfo;
@@ -12992,6 +13658,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
+ prog->jited_len = func[0]->jited_len;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
bpf_prog_jit_attempt_done(prog);
@@ -13019,6 +13686,7 @@ out_free:
out_undo_insn:
/* cleanup main prog to be interpreted */
prog->jit_requested = 0;
+ prog->blinding_requested = 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (!bpf_pseudo_call(insn))
continue;
@@ -13112,7 +13780,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
enum bpf_attach_type eatype = prog->expected_attach_type;
- bool expect_blinding = bpf_jit_blinding_enabled(prog);
enum bpf_prog_type prog_type = resolve_prog_type(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
@@ -13276,7 +13943,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
- if (env->bpf_capable && !expect_blinding &&
+ if (env->bpf_capable && !prog->blinding_requested &&
prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
!bpf_map_ptr_poisoned(aux) &&
@@ -13364,6 +14031,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
+ if (insn->imm == BPF_FUNC_task_storage_get ||
+ insn->imm == BPF_FUNC_sk_storage_get ||
+ insn->imm == BPF_FUNC_inode_storage_get) {
+ if (env->prog->aux->sleepable)
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
+ else
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+ insn_buf[1] = *insn;
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit
* only.
@@ -13376,7 +14063,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_map_pop_elem ||
insn->imm == BPF_FUNC_map_peek_elem ||
insn->imm == BPF_FUNC_redirect_map ||
- insn->imm == BPF_FUNC_for_each_map_elem)) {
+ insn->imm == BPF_FUNC_for_each_map_elem ||
+ insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@@ -13425,6 +14113,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
bpf_callback_t callback_fn,
void *callback_ctx,
u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+ (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
patch_map_ops_generic:
switch (insn->imm) {
@@ -13452,6 +14142,9 @@ patch_map_ops_generic:
case BPF_FUNC_for_each_map_elem:
insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
continue;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+ continue;
}
goto patch_call_imm;