diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2017-12-31 16:12:24 +0100 |
|---|---|---|
| committer | Daniel Borkmann <daniel@iogearbox.net> | 2017-12-31 16:12:24 +0100 |
| commit | 5620e1a8e2e6f96bdb72abfd68a56ce8cb94dd4d (patch) | |
| tree | 1028ca0b34c579067d321d7ac10ce13606b66346 /kernel | |
| parent | fb982666e380c1632a74495b68b3c33a66e76430 (diff) | |
| parent | 752d7b4501c250bead233ab041738db84436b1af (diff) | |
Merge branch 'bpf-offload-report-dev'
Jakub Kicinski says:
====================
This series is a redo of reporting offload device information to
user space after the first attempt did not take into account name
spaces. As requested by Kirill offloads are now protected by an
r/w sem. This allows us to remove the workqueue and free the
offload state fully when device is removed (suggested by Alexei).
Net namespace is reported with a device/inode pair.
The accompanying bpftool support is placed in common code because
maps will have very similar info. Note that the UAPI information
can't be nicely encapsulated into a struct, because in case we
need to grow the device information the new fields will have to
be added at the end of struct bpf_prog_info, we can't grow
structures in the middle of bpf_prog_info.
v3:
- use dev_get_by_index();
- redo ns code (new patch 6).
v2:
- rework the locking in patch 1 (use RCU instead of locking
dependencies);
- grab RTNL for a short time in patch 6;
- minor update to the test in patch 8.
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/offload.c | 147 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 19 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 20 |
3 files changed, 133 insertions, 53 deletions
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8455b89d1bbf..040d4e0edf3f 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -16,17 +16,22 @@ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/bug.h> +#include <linux/kdev_t.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> +#include <linux/proc_ns.h> #include <linux/rtnetlink.h> +#include <linux/rwsem.h> -/* protected by RTNL */ +/* Protects bpf_prog_offload_devs and offload members of all progs. + * RTNL lock cannot be taken when holding this lock. + */ +static DECLARE_RWSEM(bpf_devs_lock); static LIST_HEAD(bpf_prog_offload_devs); int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) { - struct net *net = current->nsproxy->net_ns; struct bpf_dev_offload *offload; if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && @@ -41,32 +46,40 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) return -ENOMEM; offload->prog = prog; - init_waitqueue_head(&offload->verifier_done); - rtnl_lock(); - offload->netdev = __dev_get_by_index(net, attr->prog_ifindex); - if (!offload->netdev) { - rtnl_unlock(); - kfree(offload); - return -EINVAL; - } + offload->netdev = dev_get_by_index(current->nsproxy->net_ns, + attr->prog_ifindex); + if (!offload->netdev) + goto err_free; + down_write(&bpf_devs_lock); + if (offload->netdev->reg_state != NETREG_REGISTERED) + goto err_unlock; prog->aux->offload = offload; list_add_tail(&offload->offloads, &bpf_prog_offload_devs); - rtnl_unlock(); + dev_put(offload->netdev); + up_write(&bpf_devs_lock); return 0; +err_unlock: + up_write(&bpf_devs_lock); + dev_put(offload->netdev); +err_free: + kfree(offload); + return -EINVAL; } static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, struct netdev_bpf *data) { - struct net_device *netdev = prog->aux->offload->netdev; + struct bpf_dev_offload *offload = prog->aux->offload; + struct net_device *netdev; ASSERT_RTNL(); - if (!netdev) + if (!offload) return -ENODEV; + netdev = offload->netdev; if (!netdev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; @@ -87,62 +100,63 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) if (err) goto exit_unlock; - env->dev_ops = data.verifier.ops; - + env->prog->aux->offload->dev_ops = data.verifier.ops; env->prog->aux->offload->dev_state = true; - env->prog->aux->offload->verifier_running = true; exit_unlock: rtnl_unlock(); return err; } +int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) +{ + struct bpf_dev_offload *offload; + int ret = -ENODEV; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) + ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); + up_read(&bpf_devs_lock); + + return ret; +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_dev_offload *offload = prog->aux->offload; struct netdev_bpf data = {}; - /* Caution - if netdev is destroyed before the program, this function - * will be called twice. - */ - data.offload.prog = prog; - if (offload->verifier_running) - wait_event(offload->verifier_done, !offload->verifier_running); - if (offload->dev_state) WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); - offload->dev_state = false; + /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ + bpf_prog_free_id(prog, true); + list_del_init(&offload->offloads); - offload->netdev = NULL; + kfree(offload); + prog->aux->offload = NULL; } void bpf_prog_offload_destroy(struct bpf_prog *prog) { - struct bpf_dev_offload *offload = prog->aux->offload; - - offload->verifier_running = false; - wake_up(&offload->verifier_done); - rtnl_lock(); - __bpf_prog_offload_destroy(prog); + down_write(&bpf_devs_lock); + if (prog->aux->offload) + __bpf_prog_offload_destroy(prog); + up_write(&bpf_devs_lock); rtnl_unlock(); - - kfree(offload); } static int bpf_prog_offload_translate(struct bpf_prog *prog) { - struct bpf_dev_offload *offload = prog->aux->offload; struct netdev_bpf data = {}; int ret; data.offload.prog = prog; - offload->verifier_running = false; - wake_up(&offload->verifier_done); - rtnl_lock(); ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); rtnl_unlock(); @@ -164,6 +178,63 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) return bpf_prog_offload_translate(prog); } +struct ns_get_path_bpf_prog_args { + struct bpf_prog *prog; + struct bpf_prog_info *info; +}; + +static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data) +{ + struct ns_get_path_bpf_prog_args *args = private_data; + struct bpf_prog_aux *aux = args->prog->aux; + struct ns_common *ns; + struct net *net; + + rtnl_lock(); + down_read(&bpf_devs_lock); + + if (aux->offload) { + args->info->ifindex = aux->offload->netdev->ifindex; + net = dev_net(aux->offload->netdev); + get_net(net); + ns = &net->ns; + } else { + args->info->ifindex = 0; + ns = NULL; + } + + up_read(&bpf_devs_lock); + rtnl_unlock(); + + return ns; +} + +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog) +{ + struct ns_get_path_bpf_prog_args args = { + .prog = prog, + .info = info, + }; + struct inode *ns_inode; + struct path ns_path; + void *res; + + res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); + if (IS_ERR(res)) { + if (!info->ifindex) + return -ENODEV; + return PTR_ERR(res); + } + + ns_inode = ns_path.dentry->d_inode; + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + info->netns_ino = ns_inode->i_ino; + path_put(&ns_path); + + return 0; +} + const struct bpf_prog_ops bpf_offload_prog_ops = { }; @@ -181,11 +252,13 @@ static int bpf_offload_notification(struct notifier_block *notifier, if (netdev->reg_state != NETREG_UNREGISTERING) break; + down_write(&bpf_devs_lock); list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads) { if (offload->netdev == netdev) __bpf_prog_offload_destroy(offload->prog); } + up_write(&bpf_devs_lock); break; default: break; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 007802c5ca7d..ebf0fb23e237 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) { - /* cBPF to eBPF migrations are currently not in the idr store. */ + /* cBPF to eBPF migrations are currently not in the idr store. + * Offloaded programs are removed from the store when their device + * disappears - even if someone grabs an fd to them they are unusable, + * simply waiting for refcnt to drop to be freed. + */ if (!prog->aux->id) return; @@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) __acquire(&prog_idr_lock); idr_remove(&prog_idr, prog->aux->id); + prog->aux->id = 0; if (do_idr_lock) spin_unlock_bh(&prog_idr_lock); @@ -1157,6 +1162,8 @@ static int bpf_prog_load(union bpf_attr *attr) if (!prog) return -ENOMEM; + prog->aux->offload_requested = !!attr->prog_ifindex; + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog_nouncharge; @@ -1178,7 +1185,7 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (attr->prog_ifindex) { + if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_offload_init(prog, attr); if (err) goto free_prog; @@ -1700,6 +1707,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } + if (bpf_prog_is_dev_bound(prog->aux)) { + err = bpf_prog_offload_info_fill(&info, prog); + if (err) + return err; + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98d8637cf70d..a2b211262c25 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4438,15 +4438,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } -static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) -{ - if (env->dev_ops && env->dev_ops->insn_hook) - return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); - - return 0; -} - static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state; @@ -4531,9 +4522,12 @@ static int do_check(struct bpf_verifier_env *env) print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); } - err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); - if (err) - return err; + if (bpf_prog_is_dev_bound(env->prog->aux)) { + err = bpf_prog_offload_verify_insn(env, insn_idx, + prev_insn_idx); + if (err) + return err; + } regs = cur_regs(env); env->insn_aux_data[insn_idx].seen = true; @@ -5463,7 +5457,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; - if (env->prog->aux->offload) { + if (bpf_prog_is_dev_bound(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env); if (ret) goto err_unlock; |
