summaryrefslogtreecommitdiff
path: root/kernel/bpf/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r--kernel/bpf/core.c103
1 files changed, 83 insertions, 20 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..bad9985b8a08 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -52,6 +52,7 @@
#define DST regs[insn->dst_reg]
#define SRC regs[insn->src_reg]
#define FP regs[BPF_REG_FP]
+#define AX regs[BPF_REG_AX]
#define ARG1 regs[BPF_REG_ARG1]
#define CTX regs[BPF_REG_CTX]
#define IMM insn->imm
@@ -365,10 +366,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
}
#ifdef CONFIG_BPF_JIT
+# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
+
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
+int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -577,27 +581,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
return ret;
}
+static atomic_long_t bpf_jit_current;
+
+#if defined(MODULES_VADDR)
+static int __init bpf_jit_charge_init(void)
+{
+ /* Only used as heuristic here to derive limit. */
+ bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+ PAGE_SIZE), INT_MAX);
+ return 0;
+}
+pure_initcall(bpf_jit_charge_init);
+#endif
+
+static int bpf_jit_charge_modmem(u32 pages)
+{
+ if (atomic_long_add_return(pages, &bpf_jit_current) >
+ (bpf_jit_limit >> PAGE_SHIFT)) {
+ if (!capable(CAP_SYS_ADMIN)) {
+ atomic_long_sub(pages, &bpf_jit_current);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+static void bpf_jit_uncharge_modmem(u32 pages)
+{
+ atomic_long_sub(pages, &bpf_jit_current);
+}
+
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_binary_header *hdr;
- unsigned int size, hole, start;
+ u32 size, hole, start, pages;
/* Most of BPF filters are really small, but if some of them
* fill a page, allow at least 128 extra bytes to insert a
* random section of illegal instructions.
*/
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+ pages = size / PAGE_SIZE;
+
+ if (bpf_jit_charge_modmem(pages))
+ return NULL;
hdr = module_alloc(size);
- if (hdr == NULL)
+ if (!hdr) {
+ bpf_jit_uncharge_modmem(pages);
return NULL;
+ }
/* Fill space with illegal/arch-dep instructions. */
bpf_fill_ill_insns(hdr, size);
- hdr->pages = size / PAGE_SIZE;
+ hdr->pages = pages;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -610,7 +651,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
+ u32 pages = hdr->pages;
+
module_memfree(hdr);
+ bpf_jit_uncharge_modmem(pages);
}
/* This symbol is only overridden by archs that have different
@@ -642,6 +686,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
+ /* Constraints on AX register:
+ *
+ * AX register is inaccessible from user space. It is mapped in
+ * all JITs, and used here for constant blinding rewrites. It is
+ * typically "stateless" meaning its contents are only valid within
+ * the executed instruction, but not across several instructions.
+ * There are a few exceptions however which are further detailed
+ * below.
+ *
+ * Constant blinding is only used by JITs, not in the interpreter.
+ * The interpreter uses AX in some occasions as a local temporary
+ * register e.g. in DIV or MOD instructions.
+ *
+ * In restricted circumstances, the verifier can also use the AX
+ * register for rewrites as long as they do not interfere with
+ * the above cases!
+ */
+ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
+ goto out;
+
if (from->imm == 0 &&
(from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
@@ -971,7 +1035,6 @@ bool bpf_opcode_in_insntable(u8 code)
*/
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
- u64 tmp;
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
@@ -1045,36 +1108,36 @@ select_insn:
(*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- div64_u64_rem(DST, SRC, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, SRC, &AX);
+ DST = AX;
CONT;
ALU_MOD_X:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) SRC);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) SRC);
CONT;
ALU64_MOD_K:
- div64_u64_rem(DST, IMM, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, IMM, &AX);
+ DST = AX;
CONT;
ALU_MOD_K:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) IMM);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) IMM);
CONT;
ALU64_DIV_X:
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- tmp = (u32) DST;
- do_div(tmp, (u32) SRC);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) SRC);
+ DST = (u32) AX;
CONT;
ALU64_DIV_K:
DST = div64_u64(DST, IMM);
CONT;
ALU_DIV_K:
- tmp = (u32) DST;
- do_div(tmp, (u32) IMM);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) IMM);
+ DST = (u32) AX;
CONT;
ALU_END_TO_BE:
switch (IMM) {
@@ -1330,7 +1393,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
@@ -1343,7 +1406,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
BPF_R1 = r1; \