From 63530aba7826a0f8e129874df9c4d264f9db3f9e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Jan 2019 10:40:59 -0800 Subject: ax25: fix possible use-after-free syzbot found that ax25 routes where not properly protected against concurrent use [1]. In this particular report the bug happened while copying ax25->digipeat. Fix this problem by making sure we call ax25_get_route() while ax25_route_lock is held, so that no modification could happen while using the route. The current two ax25_get_route() callers do not sleep, so this change should be fine. Once we do that, ax25_get_route() no longer needs to grab a reference on the found route. [1] ax25_connect(): syz-executor0 uses autobind, please contact jreuter@yaina.de BUG: KASAN: use-after-free in memcpy include/linux/string.h:352 [inline] BUG: KASAN: use-after-free in kmemdup+0x42/0x60 mm/util.c:113 Read of size 66 at addr ffff888066641a80 by task syz-executor2/531 ax25_connect(): syz-executor0 uses autobind, please contact jreuter@yaina.de CPU: 1 PID: 531 Comm: syz-executor2 Not tainted 5.0.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x24/0x50 mm/kasan/common.c:130 memcpy include/linux/string.h:352 [inline] kmemdup+0x42/0x60 mm/util.c:113 kmemdup include/linux/string.h:425 [inline] ax25_rt_autobind+0x25d/0x750 net/ax25/ax25_route.c:424 ax25_connect.cold+0x30/0xa4 net/ax25/af_ax25.c:1224 __sys_connect+0x357/0x490 net/socket.c:1664 __do_sys_connect net/socket.c:1675 [inline] __se_sys_connect net/socket.c:1672 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1672 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458099 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f870ee22c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458099 RDX: 0000000000000048 RSI: 0000000020000080 RDI: 0000000000000005 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 ax25_connect(): syz-executor4 uses autobind, please contact jreuter@yaina.de R10: 0000000000000000 R11: 0000000000000246 R12: 00007f870ee236d4 R13: 00000000004be48e R14: 00000000004ce9a8 R15: 00000000ffffffff Allocated by task 526: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:496 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:469 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:504 ax25_connect(): syz-executor5 uses autobind, please contact jreuter@yaina.de kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3609 kmalloc include/linux/slab.h:545 [inline] ax25_rt_add net/ax25/ax25_route.c:95 [inline] ax25_rt_ioctl+0x3b9/0x1270 net/ax25/ax25_route.c:233 ax25_ioctl+0x322/0x10b0 net/ax25/af_ax25.c:1763 sock_do_ioctl+0xe2/0x400 net/socket.c:950 sock_ioctl+0x32f/0x6c0 net/socket.c:1074 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0x107b/0x17d0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe ax25_connect(): syz-executor5 uses autobind, please contact jreuter@yaina.de Freed by task 550: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:458 kasan_slab_free+0xe/0x10 mm/kasan/common.c:466 __cache_free mm/slab.c:3487 [inline] kfree+0xcf/0x230 mm/slab.c:3806 ax25_rt_add net/ax25/ax25_route.c:92 [inline] ax25_rt_ioctl+0x304/0x1270 net/ax25/ax25_route.c:233 ax25_ioctl+0x322/0x10b0 net/ax25/af_ax25.c:1763 sock_do_ioctl+0xe2/0x400 net/socket.c:950 sock_ioctl+0x32f/0x6c0 net/socket.c:1074 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0x107b/0x17d0 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff888066641a80 which belongs to the cache kmalloc-96 of size 96 The buggy address is located 0 bytes inside of 96-byte region [ffff888066641a80, ffff888066641ae0) The buggy address belongs to the page: page:ffffea0001999040 count:1 mapcount:0 mapping:ffff88812c3f04c0 index:0x0 flags: 0x1fffc0000000200(slab) ax25_connect(): syz-executor4 uses autobind, please contact jreuter@yaina.de raw: 01fffc0000000200 ffffea0001817948 ffffea0002341dc8 ffff88812c3f04c0 raw: 0000000000000000 ffff888066641000 0000000100000020 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888066641980: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888066641a00: 00 00 00 00 00 00 00 00 02 fc fc fc fc fc fc fc >ffff888066641a80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ ffff888066641b00: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff888066641b80: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc Signed-off-by: Eric Dumazet Cc: Ralf Baechle Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/ax25.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/ax25.h b/include/net/ax25.h index 3f9aea8087e3..8b7eb46ad72d 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -201,6 +201,18 @@ static inline void ax25_hold_route(ax25_route *ax25_rt) void __ax25_put_route(ax25_route *ax25_rt); +extern rwlock_t ax25_route_lock; + +static inline void ax25_route_lock_use(void) +{ + read_lock(&ax25_route_lock); +} + +static inline void ax25_route_lock_unuse(void) +{ + read_unlock(&ax25_route_lock); +} + static inline void ax25_put_route(ax25_route *ax25_rt) { if (refcount_dec_and_test(&ax25_rt->refcount)) -- cgit v1.2.3 From 32eb67b93c9e3cd62cb423e30b090cdd4aa8d275 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Sun, 27 Jan 2019 00:57:38 +0000 Subject: net: tls: Save iv in tls_rec for async crypto requests aead_request_set_crypt takes an iv pointer, and we change the iv soon after setting it. Some async crypto algorithms don't save the iv, so we need to save it in the tls_rec for async requests. Found by hardcoding x64 aesni to use async crypto manager (to test the async codepath), however I don't think this combination can happen in the wild. Presumably other hardware offloads will need this fix, but there have been no user reports. Fixes: a42055e8d2c30 ("Add support for async encryption of records...") Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 2a6ac8d642af..1486b60c4de8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -120,6 +120,8 @@ struct tls_rec { struct scatterlist sg_aead_out[2]; char aad_space[TLS_AAD_SPACE_SIZE]; + u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + + TLS_CIPHER_AES_GCM_128_SALT_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; -- cgit v1.2.3 From d5256083f62e2720f75bb3c5a928a0afe47d6bc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Jan 2019 12:49:48 +0100 Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, I ran into the issue that while l3 mode is working fine, l3s mode does not have any connectivity to kube-apiserver and hence all pods end up in Error state as well. The ipvlan master device sits on top of a bond device and hostns traffic to kube-apiserver (also running in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 where the latter is the address of the bond0. While in l3 mode, a curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 works fine from hostns, neither of them do in case of l3s. In the latter only a curl to https://127.0.0.1:37573 appeared to work where for local addresses of bond0 I saw kernel suddenly starting to emit ARP requests to query HW address of bond0 which remained unanswered and neighbor entries in INCOMPLETE state. These ARP requests only happen while in l3s. Debugging this further, I found the issue is that l3s mode is piggy- backing on l3 master device, and in this case local routes are using l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback"). I found that reverting them back into using the net->loopback_dev fixed ipvlan l3s connectivity and got everything working for the CNI. Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the l3mdev paper in [0] the only sole reason why ipvlan l3s is relying on l3 master device is to get the l3mdev_ip_rcv() receive hook for setting the dst entry of the input route without adding its own ipvlan specific hacks into the receive path, however, any l3 domain semantics beyond just that are breaking l3s operation. Note that ipvlan also has the ability to dynamically switch its internal operation from l3 to l3s for all ports via ipvlan_set_port_mode() at runtime. In any case, l3 vs l3s soley distinguishes itself by 'de-confusing' netfilter through switching skb->dev to ipvlan slave device late in NF_INET_LOCAL_IN before handing the skb to L4. Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook without any additional l3mdev semantics on top. This should also have minimal impact since dev->priv_flags is already hot in cache. With this set, l3s mode is working fine and I also get things like masquerading pod traffic on the ipvlan master properly working. [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: David Ahern Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) -- cgit v1.2.3 From f6ac8585897684374a19863fff21186a05805286 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 10:49:13 +0100 Subject: netfilter: nf_tables: unbind set in rule from commit path Anonymous sets that are bound to rules from the same transaction trigger a kernel splat from the abort path due to double set list removal and double free. This patch updates the logic to search for the transaction that is responsible for creating the set and disable the set list removal and release, given the rule is now responsible for this. Lookup is reverse since the transaction that adds the set is likely to be at the tail of the list. Moreover, this patch adds the unbind step to deliver the event from the commit path. This should not be done from the worker thread, since we have no guarantees of in-order delivery to the listener. This patch removes the assumption that both activate and deactivate callbacks need to be provided. Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") Reported-by: Mikhail Morfikov Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..b4984bbbe157 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -469,9 +469,7 @@ struct nft_set_binding { int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); + struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -721,6 +719,13 @@ struct nft_expr_type { #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2 +enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + /** * struct nft_expr_ops - nf_tables expression operations * @@ -750,7 +755,8 @@ struct nft_expr_ops { void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@ -1323,12 +1329,15 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; -- cgit v1.2.3