From 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:09 +0000 Subject: net: add dev_net_rcu() helper dev->nd_net can change, readers should either use rcu_read_lock() or RTNL. We currently use a generic helper, dev_net() with no debugging support. We probably have many hidden bugs. Add dev_net_rcu() helper for callers using rcu_read_lock() protection. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++++ include/net/net_namespace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03bb584c62cf..c0a86afb85da 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2663,6 +2663,12 @@ struct net *dev_net(const struct net_device *dev) return read_pnet(&dev->nd_net); } +static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + static inline void dev_net_set(struct net_device *dev, struct net *net) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0f5eb9db0c62..7ba1402ca779 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -398,7 +398,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); -- cgit v1.2.3 From 469308552ca4560176cfc100e7ca84add1bebd7c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:10 +0000 Subject: ipv4: add RCU protection to ip4_dst_hoplimit() ip4_dst_hoplimit() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index f86775be3e29..c605fd5ec0c0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -382,10 +382,15 @@ static inline int inet_iif(const struct sk_buff *skb) static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - struct net *net = dev_net(dst->dev); - if (hoplimit == 0) + if (hoplimit == 0) { + const struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + rcu_read_unlock(); + } return hoplimit; } -- cgit v1.2.3 From 071d8012869b6af352acca346ade13e7be90a49f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:11 +0000 Subject: ipv4: use RCU protection in ip_dst_mtu_maybe_forward() ip_dst_mtu_maybe_forward() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: f87c10a8aa1e8 ("ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 9f5e33e371fc..ba7b43447775 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -471,9 +471,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); - struct net *net = dev_net(dst->dev); - unsigned int mtu; + unsigned int mtu, res; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -497,7 +500,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + res = mtu - lwtunnel_headroom(dst->lwtstate, mtu); + + rcu_read_unlock(); + + return res; } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, -- cgit v1.2.3 From 6a774228e890ee04a0ee13f4e6e731ec8554b9c2 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 5 Feb 2025 12:03:01 +0100 Subject: net: ethtool: tsconfig: Fix netlink type of hwtstamp flags Fix the netlink type for hardware timestamp flags, which are represented as a bitset of flags. Although only one flag is supported currently, the correct netlink bitset type should be used instead of u32 to keep consistency with other fields. Address this by adding a new named string set description for the hwtstamp flag structure. The code has been introduced in the current release so the uAPI change is still okay. Signed-off-by: Kory Maincent Fixes: 6e9e2eed4f39 ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config") Link: https://patch.msgid.link/20250205110304.375086-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1089b88efc7..9b18c4cfe56f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -682,6 +682,7 @@ enum ethtool_link_ext_substate_module { * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics * @ETH_SS_STATS_RMON: names of RMON statistics * @ETH_SS_STATS_PHY: names of PHY(dev) statistics + * @ETH_SS_TS_FLAGS: hardware timestamping flags * * @ETH_SS_COUNT: number of defined string sets */ @@ -708,6 +709,7 @@ enum ethtool_stringset { ETH_SS_STATS_ETH_CTRL, ETH_SS_STATS_RMON, ETH_SS_STATS_PHY, + ETH_SS_TS_FLAGS, /* add new constants above here */ ETH_SS_COUNT -- cgit v1.2.3 From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 6 Feb 2025 22:28:48 +0100 Subject: Revert "net: skb: introduce and use a single page frag cache" This reverts commit dbae2b062824 ("net: skb: introduce and use a single page frag cache"). The intended goal of such change was to counter a performance regression introduced by commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs"). Unfortunately, the blamed commit introduces another regression for the virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny size, so that the whole head frag could fit a 512-byte block. The single page frag cache uses a 1K fragment for such allocation, and the additional overhead, under small UDP packets flood, makes the page allocator a bottleneck. Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for typical/small skb->head"), this revert does not re-introduce the original regression. Actually, in the relevant test on top of this revert, I measure a small but noticeable positive delta, just above noise level. The revert itself required some additional mangling due to the introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the affected code. Suggested-by: Eric Dumazet Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.1738877290.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c0a86afb85da..365f0e2098d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) -- cgit v1.2.3 From 6d0ce46a93135d96b7fa075a94a88fe0da8e8773 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:38 +0000 Subject: vrf: use RCU protection in l3mdev_l3_out() l3mdev_l3_out() can be called without RCU being held: raw_sendmsg() ip_push_pending_frames() ip_send_skb() ip_local_out() __ip_local_out() l3mdev_ip_out() Add rcu_read_lock() / rcu_read_unlock() pair to avoid a potential UAF. Fixes: a8e3e1a9f020 ("net: l3mdev: Add hook to output path") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/l3mdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 2d6141f28b53..f7fe796e8429 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(dev)) { struct net_device *master; + rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); + rcu_read_unlock(); } return skb; -- cgit v1.2.3 From ab4eedb790cae44313759b50fe47da285e2519d5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Feb 2025 15:54:45 -0500 Subject: Bluetooth: L2CAP: Fix corrupted list in hci_chan_del This fixes the following trace by reworking the locking of l2cap_conn so instead of only locking when changing the chan_l list this promotes chan_lock to a general lock of l2cap_conn so whenever it is being held it would prevents the likes of l2cap_conn_del to run: list_del corruption, ffff888021297e00->prev is LIST_POISON2 (dead000000000122) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:61! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 UID: 0 PID: 5896 Comm: syz-executor213 Not tainted 6.14.0-rc1-next-20250204-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 RIP: 0010:__list_del_entry_valid_or_report+0x12c/0x190 lib/list_debug.c:59 Code: 8c 4c 89 fe 48 89 da e8 32 8c 37 fc 90 0f 0b 48 89 df e8 27 9f 14 fd 48 c7 c7 a0 c0 60 8c 4c 89 fe 48 89 da e8 15 8c 37 fc 90 <0f> 0b 4c 89 e7 e8 0a 9f 14 fd 42 80 3c 2b 00 74 08 4c 89 e7 e8 cb RSP: 0018:ffffc90003f6f998 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: 01454d423f7fbf00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff819f077c R09: 1ffff920007eded0 R10: dffffc0000000000 R11: fffff520007eded1 R12: dead000000000122 R13: dffffc0000000000 R14: ffff8880352248d8 R15: ffff888021297e00 FS: 00007f7ace6686c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7aceeeb1d0 CR3: 000000003527c000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry_valid include/linux/list.h:124 [inline] __list_del_entry include/linux/list.h:215 [inline] list_del_rcu include/linux/rculist.h:168 [inline] hci_chan_del+0x70/0x1b0 net/bluetooth/hci_conn.c:2858 l2cap_conn_free net/bluetooth/l2cap_core.c:1816 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_conn_put+0x70/0xe0 net/bluetooth/l2cap_core.c:1830 l2cap_sock_shutdown+0xa8a/0x1020 net/bluetooth/l2cap_sock.c:1377 l2cap_sock_release+0x79/0x1d0 net/bluetooth/l2cap_sock.c:1416 __sock_release net/socket.c:642 [inline] sock_close+0xbc/0x240 net/socket.c:1393 __fput+0x3e9/0x9f0 fs/file_table.c:448 task_work_run+0x24f/0x310 kernel/task_work.c:227 ptrace_notify+0x2d2/0x380 kernel/signal.c:2522 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc7/0x1d0 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x24a/0x340 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f7aceeaf449 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f7ace668218 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: fffffffffffffffc RBX: 00007f7acef39328 RCX: 00007f7aceeaf449 RDX: 000000000000000e RSI: 0000000020000100 RDI: 0000000000000004 RBP: 00007f7acef39320 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 R13: 0000000000000004 R14: 00007f7ace668670 R15: 000000000000000b Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:__list_del_entry_valid_or_report+0x12c/0x190 lib/list_debug.c:59 Code: 8c 4c 89 fe 48 89 da e8 32 8c 37 fc 90 0f 0b 48 89 df e8 27 9f 14 fd 48 c7 c7 a0 c0 60 8c 4c 89 fe 48 89 da e8 15 8c 37 fc 90 <0f> 0b 4c 89 e7 e8 0a 9f 14 fd 42 80 3c 2b 00 74 08 4c 89 e7 e8 cb RSP: 0018:ffffc90003f6f998 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: 01454d423f7fbf00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff819f077c R09: 1ffff920007eded0 R10: dffffc0000000000 R11: fffff520007eded1 R12: dead000000000122 R13: dffffc0000000000 R14: ffff8880352248d8 R15: ffff888021297e00 FS: 00007f7ace6686c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7acef05b08 CR3: 000000003527c000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Reported-by: syzbot+10bd8fe6741eedd2be2e@syzkaller.appspotmail.com Tested-by: syzbot+10bd8fe6741eedd2be2e@syzkaller.appspotmail.com Fixes: b4f82f9ed43a ("Bluetooth: L2CAP: Fix slab-use-after-free Read in l2cap_send_cmd") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Dan Carpenter --- include/net/bluetooth/l2cap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d9c767cf773d..9189354c568f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -668,7 +668,7 @@ struct l2cap_conn { struct l2cap_chan *smp; struct list_head chan_l; - struct mutex chan_lock; + struct mutex lock; struct kref ref; struct list_head users; }; @@ -970,6 +970,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); -- cgit v1.2.3 From 0892b840318daa6ae739b7cdec5ecdfca4006689 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Feb 2025 08:49:44 -0800 Subject: Reapply "net: skb: introduce and use a single page frag cache" This reverts commit 011b0335903832facca86cd8ed05d7d8d94c9c76. Sabrina reports that the revert may trigger warnings due to intervening changes, especially the ability to rise MAX_SKB_FRAGS. Let's drop it and revisit once that part is also ironed out. Fixes: 011b03359038 ("Revert "net: skb: introduce and use a single page frag cache"") Reported-by: Sabrina Dubroca Link: https://lore.kernel.org/6bf54579233038bc0e76056c5ea459872ce362ab.1739375933.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 365f0e2098d1..c0a86afb85da 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4115,6 +4115,7 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); +void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) -- cgit v1.2.3