diff options
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r-- | kernel/bpf/core.c | 103 |
1 files changed, 83 insertions, 20 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3f5bf1af0826..bad9985b8a08 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -52,6 +52,7 @@ #define DST regs[insn->dst_reg] #define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] +#define AX regs[BPF_REG_AX] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] #define IMM insn->imm @@ -365,10 +366,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT +# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) + /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; +int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -577,27 +581,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, return ret; } +static atomic_long_t bpf_jit_current; + +#if defined(MODULES_VADDR) +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, + PAGE_SIZE), INT_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); +#endif + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) + return NULL; hdr = module_alloc(size); - if (hdr == NULL) + if (!hdr) { + bpf_jit_uncharge_modmem(pages); return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -610,7 +651,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } /* This symbol is only overridden by archs that have different @@ -642,6 +686,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + /* Constraints on AX register: + * + * AX register is inaccessible from user space. It is mapped in + * all JITs, and used here for constant blinding rewrites. It is + * typically "stateless" meaning its contents are only valid within + * the executed instruction, but not across several instructions. + * There are a few exceptions however which are further detailed + * below. + * + * Constant blinding is only used by JITs, not in the interpreter. + * The interpreter uses AX in some occasions as a local temporary + * register e.g. in DIV or MOD instructions. + * + * In restricted circumstances, the verifier can also use the AX + * register for rewrites as long as they do not interfere with + * the above cases! + */ + if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) + goto out; + if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { @@ -971,7 +1035,6 @@ bool bpf_opcode_in_insntable(u8 code) */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { - u64 tmp; #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z static const void *jumptable[256] = { @@ -1045,36 +1108,36 @@ select_insn: (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - div64_u64_rem(DST, SRC, &tmp); - DST = tmp; + div64_u64_rem(DST, SRC, &AX); + DST = AX; CONT; ALU_MOD_X: - tmp = (u32) DST; - DST = do_div(tmp, (u32) SRC); + AX = (u32) DST; + DST = do_div(AX, (u32) SRC); CONT; ALU64_MOD_K: - div64_u64_rem(DST, IMM, &tmp); - DST = tmp; + div64_u64_rem(DST, IMM, &AX); + DST = AX; CONT; ALU_MOD_K: - tmp = (u32) DST; - DST = do_div(tmp, (u32) IMM); + AX = (u32) DST; + DST = do_div(AX, (u32) IMM); CONT; ALU64_DIV_X: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - tmp = (u32) DST; - do_div(tmp, (u32) SRC); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) SRC); + DST = (u32) AX; CONT; ALU64_DIV_K: DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) DST; - do_div(tmp, (u32) IMM); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) IMM); + DST = (u32) AX; CONT; ALU_END_TO_BE: switch (IMM) { @@ -1330,7 +1393,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ @@ -1343,7 +1406,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ |