From d0d3e9c2867b32c9c70e39e74b9425871cf0042a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Oct 2025 06:32:21 +0000 Subject: net: gro: clear skb_shinfo(skb)->hwtstamps in napi_reuse_skb() Some network drivers assume this field is zero after napi_get_frags(). We must clear it in napi_reuse_skb() otherwise the following can happen: 1) A packet is received, and skb_shinfo(skb)->hwtstamps is populated because a bit in the receive descriptor announced hwtstamp availability for this packet. 2) Packet is given to gro layer via napi_gro_frags(). 3) Packet is merged to a prior one held in GRO queues. 4) skb is saved after some cleanup in napi->skb via a call to napi_reuse_skb(). 5) Next packet is received 10 seconds later, gets the recycled skb from napi_get_frags(). 6) The receive descriptor does not announce hwtstamp availability. Driver does not clear shinfo->hwtstamps. 7) We have in shinfo->hwtstamps an old timestamp. Fixes: ac45f602ee3d ("net: infrastructure for hardware time stamping") Signed-off-by: Eric Dumazet Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251015063221.4171986-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gro.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/gro.c b/net/core/gro.c index 5ba4504cfd28..76f9c3712422 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -639,6 +639,8 @@ EXPORT_SYMBOL(gro_receive_skb); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + struct skb_shared_info *shinfo; + if (unlikely(skb->pfmemalloc)) { consume_skb(skb); return; @@ -655,8 +657,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb->ip_summed = CHECKSUM_NONE; - skb_shinfo(skb)->gso_type = 0; - skb_shinfo(skb)->gso_size = 0; + + shinfo = skb_shinfo(skb); + shinfo->gso_type = 0; + shinfo->gso_size = 0; + shinfo->hwtstamps.hwtstamp = 0; + if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); -- cgit v1.2.3 From bf29555f5bdc017bac22ca66fcb6c9f46ec8788f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Wiesb=C3=B6ck?= Date: Wed, 15 Oct 2025 22:15:43 +0200 Subject: rtnetlink: Allow deleting FDB entries in user namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating FDB entries is possible from a non-initial user namespace when having CAP_NET_ADMIN, yet, when deleting FDB entries, processes receive an EPERM because the capability is always checked against the initial user namespace. This restricts the FDB management from unprivileged containers. Drop the netlink_capable check in rtnl_fdb_del as it was originally dropped in c5c351088ae7 and reintroduced in 1690be63a27b without intention. This patch was tested using a container on GyroidOS, where it was possible to delete FDB entries from an unprivileged user namespace and private network namespace. Fixes: 1690be63a27b ("bridge: Add vlan support to static neighbors") Reviewed-by: Michael Weiß Tested-by: Harshal Gohel Signed-off-by: Johannes Wiesböck Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251015201548.319871-1-johannes.wiesboeck@aisec.fraunhofer.de Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8040ff7c356e..576d5ec3bb36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4715,9 +4715,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u16 vid; - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - if (!del_bulk) { err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); -- cgit v1.2.3 From c5394b8b7a92c5013d2917591e28e938fe7ff2a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Oct 2025 16:11:14 +0000 Subject: net: gro_cells: fix lock imbalance in gro_cells_receive() syzbot found that the local_unlock_nested_bh() call was missing in some cases. WARNING: possible recursive locking detected syzkaller #0 Not tainted -------------------------------------------- syz.2.329/7421 is trying to acquire lock: ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:44 [inline] ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: gro_cells_receive+0x404/0x790 net/core/gro_cells.c:30 but task is already holding lock: ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:44 [inline] ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: gro_cells_receive+0x404/0x790 net/core/gro_cells.c:30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((&cell->bh_lock)); lock((&cell->bh_lock)); *** DEADLOCK *** Given the introduction of @have_bh_lock variable, it seems the author intent was to have the local_unlock_nested_bh() after the @unlock label. Fixes: 25718fdcbdd2 ("net: gro_cells: Use nested-BH locking for gro_cell") Reported-by: syzbot+f9651b9a8212e1c8906f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68f65eb9.a70a0220.205af.0034.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Sebastian Andrzej Siewior Reviewed-by: David Ahern Link: https://patch.msgid.link/20251020161114.1891141-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index b43911562f4d..fd57b845de33 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -43,12 +43,11 @@ drop: if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - if (have_bh_lock) - local_unlock_nested_bh(&gcells->cells->bh_lock); - res = NET_RX_SUCCESS; unlock: + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); rcu_read_unlock(); return res; } -- cgit v1.2.3 From f6ceec6434b5efff62cecbaa2ff74fc29b96c0c6 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Tue, 21 Oct 2025 12:09:40 +0200 Subject: net: datagram: introduce datagram_poll_queue for custom receive queues Some protocols using TCP encapsulation (e.g., espintcp, openvpn) deliver userspace-bound packets through a custom skb queue rather than the standard sk_receive_queue. Introduce datagram_poll_queue that accepts an explicit receive queue, and convert datagram_poll into a wrapper around datagram_poll_queue. This allows protocols with custom skb queues to reuse the core polling logic without relying on sk_receive_queue. Cc: Sabrina Dubroca Cc: Antonio Quartulli Signed-off-by: Ralf Lici Reviewed-by: Sabrina Dubroca Reviewed-by: Antonio Quartulli Link: https://patch.msgid.link/20251021100942.195010-2-ralf@mandelbit.com Signed-off-by: Paolo Abeni --- net/core/datagram.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index cb4b9ef2e4e3..c285c6465923 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -920,21 +920,22 @@ fault: EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** - * datagram_poll - generic datagram poll + * datagram_poll_queue - same as datagram_poll, but on a specific receive + * queue * @file: file struct * @sock: socket * @wait: poll table + * @rcv_queue: receive queue to poll * - * Datagram poll: Again totally generic. This also handles - * sequenced packet sockets providing the socket receive queue - * is only ever holding data ready to receive. + * Performs polling on the given receive queue, handling shutdown, error, + * and connection state. This is useful for protocols that deliver + * userspace-bound packets through a custom queue instead of + * sk->sk_receive_queue. * - * Note: when you *don't* use this routine for this protocol, - * and you use a different write policy from sock_writeable() - * then please supply your own write_space callback. + * Return: poll bitmask indicating the socket's current state */ -__poll_t datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + poll_table *wait, struct sk_buff_head *rcv_queue) { struct sock *sk = sock->sk; __poll_t mask; @@ -956,7 +957,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(rcv_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -978,4 +979,27 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, return mask; } +EXPORT_SYMBOL(datagram_poll_queue); + +/** + * datagram_poll - generic datagram poll + * @file: file struct + * @sock: socket + * @wait: poll table + * + * Datagram poll: Again totally generic. This also handles + * sequenced packet sockets providing the socket receive queue + * is only ever holding data ready to receive. + * + * Note: when you *don't* use this routine for this protocol, + * and you use a different write policy from sock_writeable() + * then please supply your own write_space callback. + * + * Return: poll bitmask indicating the socket's current state + */ +__poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + return datagram_poll_queue(file, sock, wait, + &sock->sk->sk_receive_queue); +} EXPORT_SYMBOL(datagram_poll); -- cgit v1.2.3