From af58e7ee6a8d83726ad8a2696e98d86400a7639c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sun, 8 Sep 2019 09:20:16 +0100 Subject: xdp: Fix race in dev_map_hash_update_elem() when replacing element MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot found a crash in dev_map_hash_update_elem(), when replacing an element with a new one. Jesper correctly identified the cause of the crash as a race condition between the initial lookup in the map (which is done before taking the lock), and the removal of the old element. Rather than just add a second lookup into the hashmap after taking the lock, fix this by reworking the function logic to take the lock before the initial lookup. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Reported-and-tested-by: syzbot+4e7a85b1432052e8d6f8@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 9af048a932b5..d27f3b60ff6d 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -650,19 +650,22 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, u32 ifindex = *(u32 *)value; u32 idx = *(u32 *)key; unsigned long flags; + int err = -EEXIST; if (unlikely(map_flags > BPF_EXIST || !ifindex)) return -EINVAL; + spin_lock_irqsave(&dtab->index_lock, flags); + old_dev = __dev_map_hash_lookup_elem(map, idx); if (old_dev && (map_flags & BPF_NOEXIST)) - return -EEXIST; + goto out_err; dev = __dev_map_alloc_node(net, dtab, ifindex, idx); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - spin_lock_irqsave(&dtab->index_lock, flags); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_err; + } if (old_dev) { hlist_del_rcu(&old_dev->index_hlist); @@ -683,6 +686,10 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, call_rcu(&old_dev->rcu, __dev_map_entry_free); return 0; + +out_err: + spin_unlock_irqrestore(&dtab->index_lock, flags); + return err; } static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, -- cgit v1.2.3 From d895a0f16fadb26d22ab531c49768f7642ae5c3e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 16 Aug 2019 12:53:00 +0200 Subject: bpf: fix accessing bpf_sysctl.file_pos on s390 "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != nux". This is because verifier rewrites a complete 32-bit bpf_sysctl.file_pos update to a partial update of the first 32 bits of 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian systems. Fix by using an offset on big-endian systems. Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect a problem there, since it expects to see 0, which it gets with high probability in error cases, so change it to seek to offset 3 and expect 3 in bpf_sysctl.file_pos. Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") Signed-off-by: Ilya Leoshkevich Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20190816105300.49035-1-iii@linux.ibm.com/ --- kernel/bpf/cgroup.c | 10 ++++++++-- kernel/bpf/verifier.c | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6a6a154cfa7b..ddd8addcdb5c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1334,6 +1334,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; + u32 read_size; switch (si->off) { case offsetof(struct bpf_sysctl, write): @@ -1365,7 +1366,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, ppos)); *insn++ = BPF_STX_MEM( - BPF_SIZEOF(u32), treg, si->src_reg, 0); + BPF_SIZEOF(u32), treg, si->src_reg, + bpf_ctx_narrow_access_offset( + 0, sizeof(u32), sizeof(loff_t))); *insn++ = BPF_LDX_MEM( BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, tmp_reg)); @@ -1374,8 +1377,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), si->dst_reg, si->src_reg, offsetof(struct bpf_sysctl_kern, ppos)); + read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); *insn++ = BPF_LDX_MEM( - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, + bpf_ctx_narrow_access_offset( + 0, read_size, sizeof(loff_t))); } *target_size = sizeof(u32); break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3fb50757e812..92a4332b041d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8619,8 +8619,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } if (is_narrower_load && size < target_size) { - u8 shift = bpf_ctx_narrow_load_shift(off, size, - size_default); + u8 shift = bpf_ctx_narrow_access_offset( + off, size, size_default) * 8; if (ctx_field_size <= 4) { if (shift) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, -- cgit v1.2.3