From df0734702a7cbba49d6765bd5ba069340bf9c5db Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:15 -0700
Subject: bpf: show real jited prog address in /proc/kallsyms

Currently, /proc/kallsyms shows page address of jited bpf program. The
main reason here is to not expose randomized start address. However,
This is not ideal for detailed profiling (find hot instructions from
stack traces). This patch replaces the page address with real prog start
address.

This change is OK because these addresses are still protected by sysctl
kptr_restrict (see kallsyms_show_value()), and only programs loaded by
root are added to kallsyms (see bpf_prog_kallsyms_add()).

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6377225b2082..1a796e0799ec 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -553,7 +553,6 @@ bool is_bpf_text_address(unsigned long addr)
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym)
 {
-	unsigned long symbol_start, symbol_end;
 	struct bpf_prog_aux *aux;
 	unsigned int it = 0;
 	int ret = -ERANGE;
@@ -566,10 +565,9 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		if (it++ != symnum)
 			continue;
 
-		bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
 		bpf_get_prog_name(aux->prog, sym);
 
-		*value = symbol_start;
+		*value = (unsigned long)aux->prog->bpf_func;
 		*type  = BPF_SYM_ELF_TYPE;
 
 		ret = 0;
-- 
cgit v1.2.3


From de57e99ceb65d0d7775cc14a8ba5931d7de1d708 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:16 -0700
Subject: bpf: show real jited address in bpf_prog_info->jited_ksyms

Currently, jited_ksyms in bpf_prog_info shows page addresses of jited
bpf program. The main reason here is to not expose randomized start
address. However, this is not ideal for detailed profiling (find hot
instructions from stack traces). This patch replaces the page address
with real prog start address.

This change is OK because bpf_prog_get_info_by_fd() is only available
to root.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ccb93277aae2..34a9eef5992c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2172,7 +2172,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
 			for (i = 0; i < ulen; i++) {
 				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				ksym_addr &= PAGE_MASK;
 				if (put_user((u64) ksym_addr, &user_ksyms[i]))
 					return -EFAULT;
 			}
-- 
cgit v1.2.3


From ff1889fc531f582f902175c0acc80321af540b24 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:17 -0700
Subject: bpf: show main program address and length in bpf_prog_info

Currently, when there is no subprog (prog->aux->func_cnt == 0),
bpf_prog_info does not return any jited_ksyms or jited_func_lens. This
patch adds main program address (prog->bpf_func) and main program
length (prog->jited_len) to bpf_prog_info.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 34a9eef5992c..9418174c276c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2158,11 +2158,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_ksyms;
-	info.nr_jited_ksyms = prog->aux->func_cnt;
+	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_ksyms && ulen) {
 		if (bpf_dump_raw_ok()) {
+			unsigned long ksym_addr;
 			u64 __user *user_ksyms;
-			ulong ksym_addr;
 			u32 i;
 
 			/* copy the address of the kernel symbol
@@ -2170,9 +2170,17 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			 */
 			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
-			for (i = 0; i < ulen; i++) {
-				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					ksym_addr = (unsigned long)
+						prog->aux->func[i]->bpf_func;
+					if (put_user((u64) ksym_addr,
+						     &user_ksyms[i]))
+						return -EFAULT;
+				}
+			} else {
+				ksym_addr = (unsigned long) prog->bpf_func;
+				if (put_user((u64) ksym_addr, &user_ksyms[0]))
 					return -EFAULT;
 			}
 		} else {
@@ -2181,7 +2189,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_func_lens;
-	info.nr_jited_func_lens = prog->aux->func_cnt;
+	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_func_lens && ulen) {
 		if (bpf_dump_raw_ok()) {
 			u32 __user *user_lens;
@@ -2190,9 +2198,16 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			/* copy the JITed image lengths for each function */
 			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
 			user_lens = u64_to_user_ptr(info.jited_func_lens);
-			for (i = 0; i < ulen; i++) {
-				func_len = prog->aux->func[i]->jited_len;
-				if (put_user(func_len, &user_lens[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					func_len =
+						prog->aux->func[i]->jited_len;
+					if (put_user(func_len, &user_lens[i]))
+						return -EFAULT;
+				}
+			} else {
+				func_len = prog->jited_len;
+				if (put_user(func_len, &user_lens[0]))
 					return -EFAULT;
 			}
 		} else {
-- 
cgit v1.2.3


From 28c2fae726bf5003cd209b0d5910a642af98316f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Nov 2018 11:35:46 +0100
Subject: bpf: fix bpf_prog_get_info_by_fd to return 0 func_lens for unpriv

While dbecd7388476 ("bpf: get kernel symbol addresses via syscall")
zeroed info.nr_jited_ksyms in bpf_prog_get_info_by_fd() for queries
from unprivileged users, commit 815581c11cc2 ("bpf: get JITed image
lengths of functions via syscall") forgot about doing so and therefore
returns the #elems of the user set up buffer which is incorrect. It
also needs to indicate a info.nr_jited_func_lens of zero.

Fixes: 815581c11cc2 ("bpf: get JITed image lengths of functions via syscall")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9418174c276c..cf5040fd5434 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2078,6 +2078,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
 		info.nr_jited_ksyms = 0;
+		info.nr_jited_func_lens = 0;
 		goto done;
 	}
 
-- 
cgit v1.2.3


From afd594240806acc138cf696c09f2f4829d55d02f Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 16 Nov 2018 12:00:07 +0000
Subject: bpf: fix off-by-one error in adjust_subprog_starts

When patching in a new sequence for the first insn of a subprog, the start
 of that subprog does not change (it's the first insn of the sequence), so
 adjust_subprog_starts should check start <= off (rather than < off).
Also added a test to test_verifier.c (it's essentially the syz reproducer).

Fixes: cc8b0b92a169 ("bpf: introduce function calls (function boundaries)")
Reported-by: syzbot+4fc427c7af994b0948be@syzkaller.appspotmail.com
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1971ca325fb4..6dd419550aba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5650,7 +5650,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
 		return;
 	/* NOTE: fake 'exit' subprog should be updated as well. */
 	for (i = 0; i <= env->subprog_cnt; i++) {
-		if (env->subprog_info[i].start < off)
+		if (env->subprog_info[i].start <= off)
 			continue;
 		env->subprog_info[i].start += len - 1;
 	}
-- 
cgit v1.2.3


From 569a933b03f3c48b392fe67c0086b3a6b9306b5a Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guroan@gmail.com>
Date: Wed, 14 Nov 2018 10:00:34 -0800
Subject: bpf: allocate local storage buffers using GFP_ATOMIC

Naresh reported an issue with the non-atomic memory allocation of
cgroup local storage buffers:

[   73.047526] BUG: sleeping function called from invalid context at
/srv/oe/build/tmp-rpb-glibc/work-shared/intel-corei7-64/kernel-source/mm/slab.h:421
[   73.060915] in_atomic(): 1, irqs_disabled(): 0, pid: 3157, name: test_cgroup_sto
[   73.068342] INFO: lockdep is turned off.
[   73.072293] CPU: 2 PID: 3157 Comm: test_cgroup_sto Not tainted
4.20.0-rc2-next-20181113 #1
[   73.080548] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[   73.088018] Call Trace:
[   73.090463]  dump_stack+0x70/0xa5
[   73.093783]  ___might_sleep+0x152/0x240
[   73.097619]  __might_sleep+0x4a/0x80
[   73.101191]  __kmalloc_node+0x1cf/0x2f0
[   73.105031]  ? cgroup_storage_update_elem+0x46/0x90
[   73.109909]  cgroup_storage_update_elem+0x46/0x90

cgroup_storage_update_elem() (as well as other update map update
callbacks) is called with disabled preemption, so GFP_ATOMIC
allocation should be used: e.g. alloc_htab_elem() in hashtab.c.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/local_storage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index c97a8f968638..bed9d48a7ae9 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -139,7 +139,8 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 		return -ENOENT;
 
 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
-			   map->value_size, __GFP_ZERO | GFP_USER,
+			   map->value_size,
+			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
 			   map->numa_node);
 	if (!new)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 813961de3ee6474dd5703e883471fd941d6c8f69 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 22 Nov 2018 10:49:56 -0800
Subject: bpf: fix integer overflow in queue_stack_map

Fix the following issues:

- allow queue_stack_map for root only
- fix u32 max_entries overflow
- disallow value_size == 0

Fixes: f1a2e44a3aec ("bpf: add queue and stack maps")
Reported-by: Wei Wu <ww9210@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/queue_stack_maps.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8bbd72d3a121..b384ea9f3254 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -7,6 +7,7 @@
 #include <linux/bpf.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/capability.h>
 #include "percpu_freelist.h"
 
 #define QUEUE_STACK_CREATE_FLAG_MASK \
@@ -45,8 +46,12 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
 /* Called from syscall */
 static int queue_stack_map_alloc_check(union bpf_attr *attr)
 {
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 0 ||
+	    attr->value_size == 0 ||
 	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
 		return -EINVAL;
 
@@ -63,15 +68,10 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
 {
 	int ret, numa_node = bpf_map_attr_numa_node(attr);
 	struct bpf_queue_stack *qs;
-	u32 size, value_size;
-	u64 queue_size, cost;
-
-	size = attr->max_entries + 1;
-	value_size = attr->value_size;
-
-	queue_size = sizeof(*qs) + (u64) value_size * size;
+	u64 size, queue_size, cost;
 
-	cost = queue_size;
+	size = (u64) attr->max_entries + 1;
+	cost = queue_size = sizeof(*qs) + size * attr->value_size;
 	if (cost >= U32_MAX - PAGE_SIZE)
 		return ERR_PTR(-E2BIG);
 
-- 
cgit v1.2.3


From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Nov 2018 14:05:38 +0100
Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr

Make fetching of the BPF call address from ppc64 JIT generic. ppc64
was using a slightly different variant rather than through the insns'
imm field encoding as the target address would not fit into that space.
Therefore, the target subprog number was encoded into the insns' offset
and fetched through fp->aux->func[off]->bpf_func instead. Given there
are other JITs with this issue and the mechanism of fetching the address
is JIT-generic, move it into the core as a helper instead. On the JIT
side, we get information on whether the retrieved address is a fixed
one, that is, not changing through JIT passes, or a dynamic one. For
the former, JITs can optimize their imm emission because this doesn't
change jump offsets throughout JIT process.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Sandipan Das <sandipan@linux.ibm.com>
Tested-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/core.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'kernel/bpf')

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a796e0799ec..b1a3545d0ec8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -672,6 +672,40 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	bpf_prog_unlock_free(fp);
 }
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed)
+{
+	s16 off = insn->off;
+	s32 imm = insn->imm;
+	u8 *addr;
+
+	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
+	if (!*func_addr_fixed) {
+		/* Place-holder address till the last pass has collected
+		 * all addresses for JITed subprograms in which case we
+		 * can pick them up from prog->aux.
+		 */
+		if (!extra_pass)
+			addr = NULL;
+		else if (prog->aux->func &&
+			 off >= 0 && off < prog->aux->func_cnt)
+			addr = (u8 *)prog->aux->func[off]->bpf_func;
+		else
+			return -EINVAL;
+	} else {
+		/* Address of a BPF helper call. Since part of the core
+		 * kernel, it's always at a fixed location. __bpf_call_base
+		 * and the helper with imm relative to it are both in core
+		 * kernel.
+		 */
+		addr = (u8 *)__bpf_call_base + imm;
+	}
+
+	*func_addr = (unsigned long)addr;
+	return 0;
+}
+
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
 			      const struct bpf_insn *aux,
 			      struct bpf_insn *to_buff)
-- 
cgit v1.2.3