From 2cbd95a5c4fb855a4177c0343a880cc2091f500d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:18 -0800 Subject: bpf: change parameters of call/branch offset adjustment In preparation for code removal change parameters to branch and call adjustment functions to be more universal. The current parameters assume we are patching a single instruction with a longer set. A diagram may help reading the change, this is for the patch single case, patching instruction 1 with a replacement of 4: ____ 0 |____| 1 |____| <-- pos ^ 2 | | <-- end old ^ | 3 | | | delta | len 4 |____| | | (patch region) 5 | | <-- end new v v 6 |____| end_old = pos + 1 end_new = pos + delta + 1 If we are before the patch region - curr variable and the target are fully in old coordinates (hence comparing against end_old). If we are after the region curr is in new coordinates (hence the comparison to end_new) but target is in mixed coordinates, so we just check if it falls before end_new, and if so it needs the adjustment. Note that we will not fix up branches which land in removed region in case of removal, which should be okay, as we are only going to remove dead code. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..ad08ba341197 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -307,15 +307,16 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; + s32 delta = end_new - end_old; s64 imm = insn->imm; - if (curr < pos && curr + imm + 1 > pos) + if (curr < pos && curr + imm + 1 >= end_old) imm += delta; - else if (curr > pos + delta && curr + imm + 1 <= pos + delta) + else if (curr >= end_new && curr + imm + 1 < end_new) imm -= delta; if (imm < imm_min || imm > imm_max) return -ERANGE; @@ -324,15 +325,16 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s32 off_min = S16_MIN, off_max = S16_MAX; + s32 delta = end_new - end_old; s32 off = insn->off; - if (curr < pos && curr + off + 1 > pos) + if (curr < pos && curr + off + 1 >= end_old) off += delta; - else if (curr > pos + delta && curr + off + 1 <= pos + delta) + else if (curr >= end_new && curr + off + 1 < end_new) off -= delta; if (off < off_min || off > off_max) return -ERANGE; @@ -341,10 +343,10 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, - const bool probe_pass) +static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, + s32 end_new, const bool probe_pass) { - u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); + u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); struct bpf_insn *insn = prog->insnsi; int ret = 0; @@ -356,8 +358,8 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, * do any other adjustments. Therefore skip the patchlet. */ if (probe_pass && i == pos) { - i += delta + 1; - insn++; + i = end_new; + insn = prog->insnsi + end_old; } code = insn->code; if (BPF_CLASS(code) != BPF_JMP || @@ -367,11 +369,11 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, if (BPF_OP(code) == BPF_CALL) { if (insn->src_reg != BPF_PSEUDO_CALL) continue; - ret = bpf_adj_delta_to_imm(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); } else { - ret = bpf_adj_delta_to_off(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_off(insn, pos, end_old, + end_new, i, probe_pass); } if (ret) break; @@ -421,7 +423,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * we afterwards may not fail anymore. */ if (insn_adj_cnt > cnt_max && - bpf_adj_branches(prog, off, insn_delta, true)) + bpf_adj_branches(prog, off, off + 1, off + len, true)) return NULL; /* Several new instructions need to be inserted. Make room @@ -453,7 +455,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * the ship has sailed to reverse to the original state. An * overflow cannot happen at this point. */ - BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); + BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); bpf_adj_linfo(prog_adj, off, insn_delta); -- cgit v1.2.3 From e2ae4ca266a1c9a0163738129506dbc63d5cca80 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:19 -0800 Subject: bpf: verifier: hard wire branches to dead code Loading programs with dead code becomes more and more common, as people begin to patch constants at load time. Turn conditional jumps to unconditional ones, to avoid potential branch misprediction penalty. This optimization is enabled for privileged users only. For branches which just fall through we could just mark them as not seen and have dead code removal take care of them, but that seems less clean. v0.2: - don't call capable(CAP_SYS_ADMIN) twice (Jiong). v3: - fix GCC warning; Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ce87198ecd01..bf1f98e8beb6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6458,6 +6458,40 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) } } +static bool insn_is_cond_jump(u8 code) +{ + u8 op; + + if (BPF_CLASS(code) != BPF_JMP) + return false; + + op = BPF_OP(code); + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; +} + +static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (!insn_is_cond_jump(insn->code)) + continue; + + if (!aux_data[i + 1].seen) + ja.off = insn->off; + else if (!aux_data[i + 1 + insn->off].seen) + ja.off = 0; + else + continue; + + memcpy(insn, &ja, sizeof(ja)); + } +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7149,6 +7183,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, struct bpf_verifier_env *env; struct bpf_verifier_log *log; int ret = -EINVAL; + bool is_priv; /* no program is valid */ if (ARRAY_SIZE(bpf_verifier_ops) == 0) @@ -7195,6 +7230,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; + is_priv = capable(CAP_SYS_ADMIN); + env->allow_ptr_leaks = is_priv; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; @@ -7212,8 +7250,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (!env->explored_states) goto skip_full_check; - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@ -7243,6 +7279,11 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ + if (is_priv) { + if (ret == 0) + opt_hard_wire_dead_code_branches(env); + } + if (ret == 0) sanitize_dead_code(env); -- cgit v1.2.3 From 52875a04f4b26e7ef30a288ea096f7cfec0e93cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:20 -0800 Subject: bpf: verifier: remove dead code Instead of overwriting dead code with jmp -1 instructions remove it completely for root. Adjust verifier state and line info appropriately. v2: - adjust func_info (Alexei); - make sure first instruction retains line info (Alexei). v4: (Yonghong) - remove unnecessary if (!insn to remove) checks; - always keep last line info if first live instruction lacks one. v5: (Martin Lau) - improve and clarify comments. Signed-off-by: Jakub Kicinski Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 12 ++++ kernel/bpf/verifier.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 185 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad08ba341197..2a81b8af3748 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -462,6 +462,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, return prog_adj; } +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) +{ + /* Branch offsets can't overflow when program is shrinking, no need + * to call bpf_adj_branches(..., true) here + */ + memmove(prog->insnsi + off, prog->insnsi + off + cnt, + sizeof(struct bpf_insn) * (prog->len - off - cnt)); + prog->len -= cnt; + + return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); +} + void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf1f98e8beb6..099b2541f87f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6432,6 +6432,150 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, + u32 off, u32 cnt) +{ + int i, j; + + /* find first prog starting at or after off (first to remove) */ + for (i = 0; i < env->subprog_cnt; i++) + if (env->subprog_info[i].start >= off) + break; + /* find first prog starting at or after off + cnt (first to stay) */ + for (j = i; j < env->subprog_cnt; j++) + if (env->subprog_info[j].start >= off + cnt) + break; + /* if j doesn't start exactly at off + cnt, we are just removing + * the front of previous prog + */ + if (env->subprog_info[j].start != off + cnt) + j--; + + if (j > i) { + struct bpf_prog_aux *aux = env->prog->aux; + int move; + + /* move fake 'exit' subprog as well */ + move = env->subprog_cnt + 1 - j; + + memmove(env->subprog_info + i, + env->subprog_info + j, + sizeof(*env->subprog_info) * move); + env->subprog_cnt -= j - i; + + /* remove func_info */ + if (aux->func_info) { + move = aux->func_info_cnt - j; + + memmove(aux->func_info + i, + aux->func_info + j, + sizeof(*aux->func_info) * move); + aux->func_info_cnt -= j - i; + /* func_info->insn_off is set after all code rewrites, + * in adjust_btf_func() - no need to adjust + */ + } + } else { + /* convert i from "first prog to remove" to "first to adjust" */ + if (env->subprog_info[i].start == off) + i++; + } + + /* update fake 'exit' subprog as well */ + for (; i <= env->subprog_cnt; i++) + env->subprog_info[i].start -= cnt; + + return 0; +} + +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, + u32 cnt) +{ + struct bpf_prog *prog = env->prog; + u32 i, l_off, l_cnt, nr_linfo; + struct bpf_line_info *linfo; + + nr_linfo = prog->aux->nr_linfo; + if (!nr_linfo) + return 0; + + linfo = prog->aux->linfo; + + /* find first line info to remove, count lines to be removed */ + for (i = 0; i < nr_linfo; i++) + if (linfo[i].insn_off >= off) + break; + + l_off = i; + l_cnt = 0; + for (; i < nr_linfo; i++) + if (linfo[i].insn_off < off + cnt) + l_cnt++; + else + break; + + /* First live insn doesn't match first live linfo, it needs to "inherit" + * last removed linfo. prog is already modified, so prog->len == off + * means no live instructions after (tail of the program was removed). + */ + if (prog->len != off && l_cnt && + (i == nr_linfo || linfo[i].insn_off != off + cnt)) { + l_cnt--; + linfo[--i].insn_off = off + cnt; + } + + /* remove the line info which refer to the removed instructions */ + if (l_cnt) { + memmove(linfo + l_off, linfo + i, + sizeof(*linfo) * (nr_linfo - i)); + + prog->aux->nr_linfo -= l_cnt; + nr_linfo = prog->aux->nr_linfo; + } + + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ + for (i = l_off; i < nr_linfo; i++) + linfo[i].insn_off -= cnt; + + /* fix up all subprogs (incl. 'exit') which start >= off */ + for (i = 0; i <= env->subprog_cnt; i++) + if (env->subprog_info[i].linfo_idx > l_off) { + /* program may have started in the removed region but + * may not be fully removed + */ + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) + env->subprog_info[i].linfo_idx -= l_cnt; + else + env->subprog_info[i].linfo_idx = l_off; + } + + return 0; +} + +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + unsigned int orig_prog_len = env->prog->len; + int err; + + err = bpf_remove_insns(env->prog, off, cnt); + if (err) + return err; + + err = adjust_subprog_starts_after_remove(env, off, cnt); + if (err) + return err; + + err = bpf_adj_linfo_after_remove(env, off, cnt); + if (err) + return err; + + memmove(aux_data + off, aux_data + off + cnt, + sizeof(*aux_data) * (orig_prog_len - off - cnt)); + + return 0; +} + /* The verifier does more data flow analysis than llvm and will not * explore branches that are dead at run time. Malicious programs can * have dead code too. Therefore replace all dead at-run-time code @@ -6492,6 +6636,30 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) } } +static int opt_remove_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + int j; + + j = 0; + while (i + j < insn_cnt && !aux_data[i + j].seen) + j++; + if (!j) + continue; + + err = verifier_remove_insns(env, i, j); + if (err) + return err; + insn_cnt = env->prog->len; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7282,11 +7450,13 @@ skip_full_check: if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); + if (ret == 0) + ret = opt_remove_dead_code(env); + } else { + if (ret == 0) + sanitize_dead_code(env); } - if (ret == 0) - sanitize_dead_code(env); - if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- cgit v1.2.3 From a1b14abc009d9c13be355dbd4a4c4d47816ad3db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:21 -0800 Subject: bpf: verifier: remove unconditional branches by 0 Unconditional branches by 0 instructions are basically noops but they can result from earlier optimizations, e.g. a conditional jumps which would never be taken or a conditional jump around dead code. Remove those branches. v0.2: - s/opt_remove_dead_branches/opt_remove_nops/ (Jiong). Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 099b2541f87f..f39bca188a5c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6660,6 +6660,27 @@ static int opt_remove_dead_code(struct bpf_verifier_env *env) return 0; } +static int opt_remove_nops(struct bpf_verifier_env *env) +{ + const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + if (memcmp(&insn[i], &ja, sizeof(ja))) + continue; + + err = verifier_remove_insns(env, i, 1); + if (err) + return err; + insn_cnt--; + i--; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7452,6 +7473,8 @@ skip_full_check: opt_hard_wire_dead_code_branches(env); if (ret == 0) ret = opt_remove_dead_code(env); + if (ret == 0) + ret = opt_remove_nops(env); } else { if (ret == 0) sanitize_dead_code(env); -- cgit v1.2.3 From 9e4c24e7ee7dfd3898269519103e823892b730d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:23 -0800 Subject: bpf: verifier: record original instruction index The communication between the verifier and advanced JITs is based on instruction indexes. We have to keep them stable throughout the optimizations otherwise referring to a particular instruction gets messy quickly. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f39bca188a5c..f2c49b4235df 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7371,7 +7371,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, { struct bpf_verifier_env *env; struct bpf_verifier_log *log; - int ret = -EINVAL; + int i, len, ret = -EINVAL; bool is_priv; /* no program is valid */ @@ -7386,12 +7386,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, return -ENOMEM; log = &env->log; + len = (*prog)->len; env->insn_aux_data = - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), - (*prog)->len)); + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; + for (i = 0; i < len; i++) + env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; -- cgit v1.2.3 From 08ca90afba255d05dc3253caa44056e7aecbe8c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 22 Jan 2019 22:45:24 -0800 Subject: bpf: notify offload JITs about optimizations Let offload JITs know when instructions are replaced and optimized out, so they can update their state appropriately. The optimizations are best effort, if JIT returns an error from any callback verifier will stop notifying it as state may now be out of sync, but the verifier continues making progress. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- kernel/bpf/offload.c | 35 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 ++++++ 2 files changed, 41 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 54cf2b9c44a4..39dba8c90331 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) return ret; } +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + const struct bpf_prog_offload_ops *ops; + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + ops = offload->offdev->ops; + if (!offload->opt_failed && ops->replace_insn) + ret = ops->replace_insn(env, off, insn); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (!offload->opt_failed && offload->offdev->ops->remove_insns) + ret = offload->offdev->ops->remove_insns(env, off, cnt); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2c49b4235df..8cfe39ef770f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6558,6 +6558,9 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_remove_insns(env, off, cnt); + err = bpf_remove_insns(env->prog, off, cnt); if (err) return err; @@ -6632,6 +6635,9 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_replace_insn(env, i, &ja); + memcpy(insn, &ja, sizeof(ja)); } } -- cgit v1.2.3