summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-11-07 19:02:42 -0800
committerJakub Kicinski <kuba@kernel.org>2025-11-07 19:02:43 -0800
commit86b721bb0b4c44499a29253b9fe26fa9ec5d2d82 (patch)
tree3262c2483a8503e1ee04820311157fb6db8726d7
parentfd9557c3606bb683c01a6c7627e915b539b9a8df (diff)
parentb61785852ed0a0e7dc16b606157e4a0228cd76cf (diff)
Merge branch 'net-use-skb_attempt_defer_free-in-napi_consume_skb'
Eric Dumazet says: ==================== net: use skb_attempt_defer_free() in napi_consume_skb() There is a lack of NUMA awareness and more generally lack of slab caches affinity on TX completion path. Modern drivers are using napi_consume_skb(), hoping to cache sk_buff in per-cpu caches so that they can be recycled in RX path. Only use this if the skb was allocated on the same cpu, otherwise use skb_attempt_defer_free() so that the skb is freed on the original cpu. This removes contention on SLUB spinlocks and data structures, and this makes sure that recycled sk_buff have correct NUMA locality. After this series, I get ~50% improvement for an UDP tx workload on an AMD EPYC 9B45 (IDPF 200Gbit NIC with 32 TX queues). I will later refactor skb_attempt_defer_free() to no longer have to care of skb_shared() and skb_release_head_state(). ==================== Link: https://patch.msgid.link/20251106202935.1776179-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/admin-guide/sysctl/net.rst4
-rw-r--r--net/core/hotdata.c2
-rw-r--r--net/core/skbuff.c12
3 files changed, 11 insertions, 7 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 991773dcb9cf..369a738a6819 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -355,9 +355,9 @@ skb_defer_max
-------------
Max size (in skbs) of the per-cpu list of skbs being freed
-by the cpu which allocated them. Used by TCP stack so far.
+by the cpu which allocated them.
-Default: 64
+Default: 128
optmem_max
----------
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 95d0a4df1006..dddd5c287cf0 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -20,7 +20,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.dev_tx_weight = 64,
.dev_rx_weight = 64,
.sysctl_max_skb_frags = MAX_SKB_FRAGS,
- .sysctl_skb_defer_max = 64,
+ .sysctl_skb_defer_max = 128,
.sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5b4bc8b1c7d5..7ac5f8aa1235 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1149,11 +1149,10 @@ void skb_release_head_state(struct sk_buff *skb)
skb);
#endif
+ skb->destructor = NULL;
}
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- nf_conntrack_put(skb_nfct(skb));
-#endif
- skb_ext_put(skb);
+ nf_reset_ct(skb);
+ skb_ext_reset(skb);
}
/* Free everything but the sk_buff shell. */
@@ -1477,6 +1476,11 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
DEBUG_NET_WARN_ON_ONCE(!in_softirq());
+ if (skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) {
+ skb_release_head_state(skb);
+ return skb_attempt_defer_free(skb);
+ }
+
if (!skb_unref(skb))
return;