diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-01-21 19:28:34 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-01-21 19:28:35 -0800 |
| commit | 9de76f55b9f856b317773e106253051fb33a9e92 (patch) | |
| tree | c22b5d9ac23eab395709dfcce832ac9f679a7e6f /include | |
| parent | a4674aa58be53921f2aba62e143cc338d6ab142c (diff) | |
| parent | b8d9b7daf0af367f3fff017de0873ab825a6dbbe (diff) | |
Merge branch 'gro-inline-tcp6_gro_-receive-complete'
Eric Dumazet says:
====================
gro: inline tcp6_gro_{receive,complete}
On some platforms, GRO stack is too deep and causes cpu stalls.
Decreasing call depths by one shows a 1.5 % gain on Zen2 cpus.
(32 RX queues, 100Gbit NIC, RFS enabled, tcp_rr with 128 threads and 10,000 flows)
We can go further by inlining ipv6_gro_{receive,complete}
and take care of IPv4 if there is interest.
Note: two temporary __always_inline will be replaced with
inline_for_performance when/if available.
Cumulative size increase for this series (of 3):
$ scripts/bloat-o-meter -t vmlinux.0 vmlinux.3
add/remove: 2/2 grow/shrink: 5/1 up/down: 1572/-471 (1101)
Function old new delta
ipv6_gro_receive 1069 1846 +777
ipv6_gro_complete 433 733 +300
tcp6_check_fraglist_gro - 272 +272
tcp6_gro_complete 227 306 +79
tcp4_gro_complete 325 397 +72
ipv6_offload_init 218 274 +56
__pfx_tcp6_check_fraglist_gro - 16 +16
__pfx___skb_incr_checksum_unnecessary 32 - -32
__skb_incr_checksum_unnecessary 186 - -186
tcp6_gro_receive 959 706 -253
Total: Before=22592724, After=22593825, chg +0.00%
====================
Link: https://patch.msgid.link/20260120164903.1912995-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/skbuff.h | 2 | ||||
| -rw-r--r-- | include/net/gro.h | 5 | ||||
| -rw-r--r-- | include/net/tcp.h | 2 |
3 files changed, 3 insertions, 6 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 86737076101d..e6bfe5d0c525 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4763,7 +4763,7 @@ static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) } } -static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +static __always_inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level < SKB_MAX_CSUM_LEVEL) diff --git a/include/net/gro.h b/include/net/gro.h index b65f631c521d..2300b6da05b2 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); +int udp6_gro_complete(struct sk_buff *, int); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 25143f156957..b38327606454 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2324,8 +2324,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else |
