diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-11-07 19:02:42 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-11-07 19:02:43 -0800 |
| commit | 86b721bb0b4c44499a29253b9fe26fa9ec5d2d82 (patch) | |
| tree | 3262c2483a8503e1ee04820311157fb6db8726d7 | |
| parent | fd9557c3606bb683c01a6c7627e915b539b9a8df (diff) | |
| parent | b61785852ed0a0e7dc16b606157e4a0228cd76cf (diff) | |
Merge branch 'net-use-skb_attempt_defer_free-in-napi_consume_skb'
Eric Dumazet says:
====================
net: use skb_attempt_defer_free() in napi_consume_skb()
There is a lack of NUMA awareness and more generally lack
of slab caches affinity on TX completion path.
Modern drivers are using napi_consume_skb(), hoping to cache sk_buff
in per-cpu caches so that they can be recycled in RX path.
Only use this if the skb was allocated on the same cpu,
otherwise use skb_attempt_defer_free() so that the skb
is freed on the original cpu.
This removes contention on SLUB spinlocks and data structures,
and this makes sure that recycled sk_buff have correct NUMA locality.
After this series, I get ~50% improvement for an UDP tx workload
on an AMD EPYC 9B45 (IDPF 200Gbit NIC with 32 TX queues).
I will later refactor skb_attempt_defer_free()
to no longer have to care of skb_shared() and skb_release_head_state().
====================
Link: https://patch.msgid.link/20251106202935.1776179-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | Documentation/admin-guide/sysctl/net.rst | 4 | ||||
| -rw-r--r-- | net/core/hotdata.c | 2 | ||||
| -rw-r--r-- | net/core/skbuff.c | 12 |
3 files changed, 11 insertions, 7 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 991773dcb9cf..369a738a6819 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -355,9 +355,9 @@ skb_defer_max ------------- Max size (in skbs) of the per-cpu list of skbs being freed -by the cpu which allocated them. Used by TCP stack so far. +by the cpu which allocated them. -Default: 64 +Default: 128 optmem_max ---------- diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 95d0a4df1006..dddd5c287cf0 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -20,7 +20,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .dev_tx_weight = 64, .dev_rx_weight = 64, .sysctl_max_skb_frags = MAX_SKB_FRAGS, - .sysctl_skb_defer_max = 64, + .sysctl_skb_defer_max = 128, .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE }; EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b4bc8b1c7d5..7ac5f8aa1235 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1149,11 +1149,10 @@ void skb_release_head_state(struct sk_buff *skb) skb); #endif + skb->destructor = NULL; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - nf_conntrack_put(skb_nfct(skb)); -#endif - skb_ext_put(skb); + nf_reset_ct(skb); + skb_ext_reset(skb); } /* Free everything but the sk_buff shell. */ @@ -1477,6 +1476,11 @@ void napi_consume_skb(struct sk_buff *skb, int budget) DEBUG_NET_WARN_ON_ONCE(!in_softirq()); + if (skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { + skb_release_head_state(skb); + return skb_attempt_defer_free(skb); + } + if (!skb_unref(skb)) return; |
