From fdae0ab67d57d480dc61e9fb45678bbdc3786711 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Sep 2025 12:19:42 +0000 Subject: net: use NUMA drop counters for softnet_data.dropped Hosts under DOS attack can suffer from false sharing in enqueue_to_backlog() : atomic_inc(&sd->dropped). This is because sd->dropped can be touched from many cpus, possibly residing on different NUMA nodes. Generalize the sk_drop_counters infrastucture added in commit c51613fa276f ("net: add sk->sk_drop_counters") and use it to replace softnet_data.dropped with NUMA friendly softnet_data.drop_counters. This adds 64 bytes per cpu, maybe more in the future if we increase the number of counters (currently 2) per 'struct numa_drop_counters'. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f3a3b761abfb..f5a840c07cf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } +struct numa_drop_counters { + atomic_t drops0 ____cacheline_aligned_in_smp; + atomic_t drops1 ____cacheline_aligned_in_smp; +}; + +static inline int numa_drop_read(const struct numa_drop_counters *ndc) +{ + return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1); +} + +static inline void numa_drop_add(struct numa_drop_counters *ndc, int val) +{ + int n = numa_node_id() % 2; + + if (n) + atomic_add(val, &ndc->drops1); + else + atomic_add(val, &ndc->drops0); +} + +static inline void numa_drop_reset(struct numa_drop_counters *ndc) +{ + atomic_set(&ndc->drops0, 0); + atomic_set(&ndc->drops1, 0); +} + /* * Incoming packets are placed on per-CPU queues */ @@ -3504,7 +3530,7 @@ struct softnet_data { struct sk_buff_head input_pkt_queue; struct napi_struct backlog; - atomic_t dropped ____cacheline_aligned_in_smp; + struct numa_drop_counters drop_counters; /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 00c94ca2b99e6610e483f92e531b319eeaed94aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:29 -0700 Subject: psp: base PSP device support Add a netlink family for PSP and allow drivers to register support. The "PSP device" is its own object. This allows us to perform more flexible reference counting / lifetime control than if PSP information was part of net_device. In the future we should also be able to "delegate" PSP access to software devices, such as *vlan, veth or netkit more easily. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-3-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f5a840c07cf1..1c54d44805fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1906,6 +1906,7 @@ enum netdev_reg_state { * device struct * @mpls_ptr: mpls_dev struct pointer * @mctp_ptr: MCTP specific data + * @psp_dev: PSP crypto device registered for this netdev * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -2310,6 +2311,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MCTP) struct mctp_dev __rcu *mctp_ptr; #endif +#if IS_ENABLED(CONFIG_INET_PSP) + struct psp_dev __rcu *psp_dev; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) -- cgit v1.2.3 From 3271f19bf7b9df665549666d789b9f126b4420c7 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Sep 2025 10:59:06 +0200 Subject: net: gso: restore ids of outer ip headers correctly Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can be mangled. Outer IDs can always be mangled. Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing both inner and outer IDs to be mangled. This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly. Signed-off-by: Richard Gobert Reviewed-by: Edward Cree # for sfc Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250923085908.4687-4-richardbgobert@gmail.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1c54d44805fa..1b85454116f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5320,13 +5320,18 @@ void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature; + + if (gso_type & (SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_FIXEDID_INNER)) + gso_type |= __SKB_GSO_TCP_FIXEDID; + + feature = ((netdev_features_t)gso_type << NETIF_F_GSO_SHIFT) & NETIF_F_GSO_MASK; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(__SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); -- cgit v1.2.3 From 9c94ae6bb0b2895024b6e29fcc1cbec968b4776a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:32 +0000 Subject: net: make softnet_data.defer_count an atomic This is preparation work to remove the softnet_data.defer_lock, as it is contended on hosts with large number of cores. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-2-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b85454116f6..27e3fa69253f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3538,7 +3538,7 @@ struct softnet_data { /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; - int defer_count; + atomic_t defer_count; int defer_ipi_scheduled; struct sk_buff *defer_list; call_single_data_t defer_csd; -- cgit v1.2.3 From 844c9db7f7f5fe1b0b53ed9f1c2bc7313b3021c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:33 +0000 Subject: net: use llist for sd->defer_list Get rid of sd->defer_lock and adopt llist operations. We optimize skb_attempt_defer_free() for the common case, where the packet is queued. Otherwise sd->defer_count is increasing, until skb_defer_free_flush() clears it. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-3-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 27e3fa69253f..5c9aa16933d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3537,10 +3537,10 @@ struct softnet_data { struct numa_drop_counters drop_counters; /* Another possibly contended cache line */ - spinlock_t defer_lock ____cacheline_aligned_in_smp; - atomic_t defer_count; - int defer_ipi_scheduled; - struct sk_buff *defer_list; + struct llist_head defer_list ____cacheline_aligned_in_smp; + atomic_long_t defer_count; + + int defer_ipi_scheduled ____cacheline_aligned_in_smp; call_single_data_t defer_csd; }; -- cgit v1.2.3 From 5628f3fe3b16114e8424bbfcf0594caef8958a06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:34 +0000 Subject: net: add NUMA awareness to skb_attempt_defer_free() Instead of sharing sd->defer_list & sd->defer_count with many cpus, add one pair for each NUMA node. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-4-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5c9aa16933d1..d1a687444b27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3536,10 +3536,6 @@ struct softnet_data { struct numa_drop_counters drop_counters; - /* Another possibly contended cache line */ - struct llist_head defer_list ____cacheline_aligned_in_smp; - atomic_long_t defer_count; - int defer_ipi_scheduled ____cacheline_aligned_in_smp; call_single_data_t defer_csd; }; -- cgit v1.2.3