From 62d468e5e10012e8b67d066ba7bac0a8afdc3cee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 2 Aug 2022 15:56:51 +0200
Subject: bpf: Cleanup ftrace hash in bpf_trampoline_put

We need to release possible hash from trampoline fops object
before removing it, otherwise we leak it.

Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20220802135651.1794015-1-jolsa@kernel.org
---
 kernel/bpf/trampoline.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 0f532e6a717f..ff87e38af8a7 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -841,7 +841,10 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	 * multiple rcu callbacks.
 	 */
 	hlist_del(&tr->hlist);
-	kfree(tr->fops);
+	if (tr->fops) {
+		ftrace_free_filter(tr->fops);
+		kfree(tr->fops);
+	}
 	kfree(tr);
 out:
 	mutex_unlock(&trampoline_mutex);
-- 
cgit v1.2.3


From 6644aabbd8973a9f8008cabfd054a36b69a3a3f5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 4 Aug 2022 13:11:39 -0700
Subject: bpf: Use proper target btf when exporting attach_btf_obj_id

When attaching to program, the program itself might not be attached
to anything (and, hence, might not have attach_btf), so we can't
unconditionally use 'prog->aux->dst_prog->aux->attach_btf'.

Instead, use bpf_prog_get_target_btf to pick proper target BTF:

  * when attached to dst_prog, use dst_prog->aux->btf
  * when attached to kernel btf, use prog->aux->attach_btf

Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220804201140.1340684-1-sdf@google.com
---
 kernel/bpf/syscall.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 83c7136c5788..7dc3f8003631 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3886,6 +3886,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 				   union bpf_attr __user *uattr)
 {
 	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+	struct btf *attach_btf = bpf_prog_get_target_btf(prog);
 	struct bpf_prog_info info;
 	u32 info_len = attr->info.info_len;
 	struct bpf_prog_kstats stats;
@@ -4088,10 +4089,8 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	if (prog->aux->btf)
 		info.btf_id = btf_obj_id(prog->aux->btf);
 	info.attach_btf_id = prog->aux->attach_btf_id;
-	if (prog->aux->attach_btf)
-		info.attach_btf_obj_id = btf_obj_id(prog->aux->attach_btf);
-	else if (prog->aux->dst_prog)
-		info.attach_btf_obj_id = btf_obj_id(prog->aux->dst_prog->aux->attach_btf);
+	if (attach_btf)
+		info.attach_btf_obj_id = btf_obj_id(attach_btf);
 
 	ulen = info.nr_func_info;
 	info.nr_func_info = prog->aux->func_info_cnt;
-- 
cgit v1.2.3


From 275c30bcee66a27d1aa97a215d607ad6d49804cb Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 9 Aug 2022 23:30:32 +0200
Subject: bpf: Don't reinit map value in prealloc_lru_pop

The LRU map that is preallocated may have its elements reused while
another program holds a pointer to it from bpf_map_lookup_elem. Hence,
only check_and_free_fields is appropriate when the element is being
deleted, as it ensures proper synchronization against concurrent access
of the map value. After that, we cannot call check_and_init_map_value
again as it may rewrite bpf_spin_lock, bpf_timer, and kptr fields while
they can be concurrently accessed from a BPF program.

This is safe to do as when the map entry is deleted, concurrent access
is protected against by check_and_free_fields, i.e. an existing timer
would be freed, and any existing kptr will be released by it. The
program can create further timers and kptrs after check_and_free_fields,
but they will eventually be released once the preallocated items are
freed on map destruction, even if the item is never reused again. Hence,
the deleted item sitting in the free list can still have resources
attached to it, and they would never leak.

With spin_lock, we never touch the field at all on delete or update, as
we may end up modifying the state of the lock. Since the verifier
ensures that a bpf_spin_lock call is always paired with bpf_spin_unlock
call, the program will eventually release the lock so that on reuse the
new user of the value can take the lock.

Essentially, for the preallocated case, we must assume that the map
value may always be in use by the program, even when it is sitting in
the freelist, and handle things accordingly, i.e. use proper
synchronization inside check_and_free_fields, and never reinitialize the
special fields when it is reused on update.

Fixes: 68134668c17f ("bpf: Add map side support for bpf timers.")
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220809213033.24147-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/hashtab.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index da7578426a46..4d793a92301b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -311,12 +311,8 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 	struct htab_elem *l;
 
 	if (node) {
-		u32 key_size = htab->map.key_size;
-
 		l = container_of(node, struct htab_elem, lru_node);
-		memcpy(l->key, key, key_size);
-		check_and_init_map_value(&htab->map,
-					 l->key + round_up(key_size, 8));
+		memcpy(l->key, key, htab->map.key_size);
 		return l;
 	}
 
-- 
cgit v1.2.3


From 86f44fcec22ce2979507742bc53db8400e454f46 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 8 Aug 2022 20:58:09 -0700
Subject: bpf: Disallow bpf programs call prog_run command.

The verifier cannot perform sufficient validation of bpf_attr->test.ctx_in
pointer, therefore bpf programs should not be allowed to call BPF_PROG_RUN
command from within the program.
To fix this issue split bpf_sys_bpf() bpf helper into normal kern_sys_bpf()
kernel function that can only be used by the kernel light skeleton directly.

Reported-by: YiFei Zhu <zhuyifei@google.com>
Fixes: b1d18a7574d0 ("bpf: Extend sys_bpf commands for bpf_syscall programs.")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7dc3f8003631..a1cb0bdc5ad6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5071,9 +5071,6 @@ static bool syscall_prog_is_valid_access(int off, int size,
 
 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 {
-	struct bpf_prog * __maybe_unused prog;
-	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
-
 	switch (cmd) {
 	case BPF_MAP_CREATE:
 	case BPF_MAP_UPDATE_ELEM:
@@ -5083,6 +5080,18 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 	case BPF_LINK_CREATE:
 	case BPF_RAW_TRACEPOINT_OPEN:
 		break;
+	default:
+		return -EINVAL;
+	}
+	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+}
+
+int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct bpf_prog * __maybe_unused prog;
+	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
+
+	switch (cmd) {
 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */
 	case BPF_PROG_TEST_RUN:
 		if (attr->test.data_in || attr->test.data_out ||
@@ -5113,11 +5122,10 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 		return 0;
 #endif
 	default:
-		return -EINVAL;
+		return ____bpf_sys_bpf(cmd, attr, size);
 	}
-	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
 }
-EXPORT_SYMBOL(bpf_sys_bpf);
+EXPORT_SYMBOL(kern_sys_bpf);
 
 static const struct bpf_func_proto bpf_sys_bpf_proto = {
 	.func		= bpf_sys_bpf,
-- 
cgit v1.2.3


From f76fa6b338055054f80c72b29c97fb95c1becadc Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 10 Aug 2022 16:05:30 +0800
Subject: bpf: Acquire map uref in .init_seq_private for array map iterator

bpf_iter_attach_map() acquires a map uref, and the uref may be released
before or in the middle of iterating map elements. For example, the uref
could be released in bpf_iter_detach_map() as part of
bpf_link_release(), or could be released in bpf_map_put_with_uref() as
part of bpf_map_release().

Alternative fix is acquiring an extra bpf_link reference just like
a pinned map iterator does, but it introduces unnecessary dependency
on bpf_link instead of bpf_map.

So choose another fix: acquiring an extra map uref in .init_seq_private
for array map iterator.

Fixes: d3cc2ab546ad ("bpf: Implement bpf iterator for array maps")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220810080538.1845898-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/arraymap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d3e734bf8056..624527401d4d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -649,6 +649,11 @@ static int bpf_iter_init_array_map(void *priv_data,
 		seq_info->percpu_value_buf = value_buf;
 	}
 
+	/* bpf_iter_attach_map() acquires a map uref, and the uref may be
+	 * released before or in the middle of iterating map elements, so
+	 * acquire an extra map uref for iterator.
+	 */
+	bpf_map_inc_with_uref(map);
 	seq_info->map = map;
 	return 0;
 }
@@ -657,6 +662,7 @@ static void bpf_iter_fini_array_map(void *priv_data)
 {
 	struct bpf_iter_seq_array_map_info *seq_info = priv_data;
 
+	bpf_map_put_with_uref(seq_info->map);
 	kfree(seq_info->percpu_value_buf);
 }
 
-- 
cgit v1.2.3


From ef1e93d2eeb58a1f08c37b22a2314b94bc045f15 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 10 Aug 2022 16:05:31 +0800
Subject: bpf: Acquire map uref in .init_seq_private for hash map iterator

bpf_iter_attach_map() acquires a map uref, and the uref may be released
before or in the middle of iterating map elements. For example, the uref
could be released in bpf_iter_detach_map() as part of
bpf_link_release(), or could be released in bpf_map_put_with_uref() as
part of bpf_map_release().

So acquiring an extra map uref in bpf_iter_init_hash_map() and
releasing it in bpf_iter_fini_hash_map().

Fixes: d6c4503cc296 ("bpf: Implement bpf iterator for hash maps")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220810080538.1845898-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/hashtab.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4d793a92301b..6c530a5e560a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -2060,6 +2060,7 @@ static int bpf_iter_init_hash_map(void *priv_data,
 		seq_info->percpu_value_buf = value_buf;
 	}
 
+	bpf_map_inc_with_uref(map);
 	seq_info->map = map;
 	seq_info->htab = container_of(map, struct bpf_htab, map);
 	return 0;
@@ -2069,6 +2070,7 @@ static void bpf_iter_fini_hash_map(void *priv_data)
 {
 	struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
 
+	bpf_map_put_with_uref(seq_info->map);
 	kfree(seq_info->percpu_value_buf);
 }
 
-- 
cgit v1.2.3


From d247049f4fd088e4e40294819a932a6057b3632c Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 10 Aug 2022 16:05:35 +0800
Subject: bpf: Only allow sleepable program for resched-able iterator

When a sleepable program is attached to a hash map iterator, might_fault()
will report "BUG: sleeping function called from invalid context..." if
CONFIG_DEBUG_ATOMIC_SLEEP is enabled. The reason is that rcu_read_lock()
is held in bpf_hash_map_seq_next() and won't be released until all elements
are traversed or bpf_hash_map_seq_stop() is called.

Fixing it by reusing BPF_ITER_RESCHED to indicate that only non-sleepable
program is allowed for iterator without BPF_ITER_RESCHED. We can revise
bpf_iter_link_attach() later if there are other conditions which may
cause rcu_read_lock() or spin_lock() issues.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220810080538.1845898-7-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/bpf_iter.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 2726a5950cfa..24b755eca0b3 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -68,13 +68,18 @@ static void bpf_iter_done_stop(struct seq_file *seq)
 	iter_priv->done_stop = true;
 }
 
+static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo)
+{
+	return tinfo->reg_info->feature & BPF_ITER_RESCHED;
+}
+
 static bool bpf_iter_support_resched(struct seq_file *seq)
 {
 	struct bpf_iter_priv_data *iter_priv;
 
 	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
 				 target_private);
-	return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED;
+	return bpf_iter_target_support_resched(iter_priv->tinfo);
 }
 
 /* maximum visited objects before bailing out */
@@ -537,6 +542,10 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
 	if (!tinfo)
 		return -ENOENT;
 
+	/* Only allow sleepable program for resched-able iterator */
+	if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo))
+		return -EINVAL;
+
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 	if (!link)
 		return -ENOMEM;
-- 
cgit v1.2.3