summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-01-21 19:28:34 -0800
committerJakub Kicinski <kuba@kernel.org>2026-01-21 19:28:35 -0800
commit9de76f55b9f856b317773e106253051fb33a9e92 (patch)
treec22b5d9ac23eab395709dfcce832ac9f679a7e6f /include
parenta4674aa58be53921f2aba62e143cc338d6ab142c (diff)
parentb8d9b7daf0af367f3fff017de0873ab825a6dbbe (diff)
Merge branch 'gro-inline-tcp6_gro_-receive-complete'
Eric Dumazet says: ==================== gro: inline tcp6_gro_{receive,complete} On some platforms, GRO stack is too deep and causes cpu stalls. Decreasing call depths by one shows a 1.5 % gain on Zen2 cpus. (32 RX queues, 100Gbit NIC, RFS enabled, tcp_rr with 128 threads and 10,000 flows) We can go further by inlining ipv6_gro_{receive,complete} and take care of IPv4 if there is interest. Note: two temporary __always_inline will be replaced with inline_for_performance when/if available. Cumulative size increase for this series (of 3): $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.3 add/remove: 2/2 grow/shrink: 5/1 up/down: 1572/-471 (1101) Function old new delta ipv6_gro_receive 1069 1846 +777 ipv6_gro_complete 433 733 +300 tcp6_check_fraglist_gro - 272 +272 tcp6_gro_complete 227 306 +79 tcp4_gro_complete 325 397 +72 ipv6_offload_init 218 274 +56 __pfx_tcp6_check_fraglist_gro - 16 +16 __pfx___skb_incr_checksum_unnecessary 32 - -32 __skb_incr_checksum_unnecessary 186 - -186 tcp6_gro_receive 959 706 -253 Total: Before=22592724, After=22593825, chg +0.00% ==================== Link: https://patch.msgid.link/20260120164903.1912995-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/net/gro.h5
-rw-r--r--include/net/tcp.h2
3 files changed, 3 insertions, 6 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 86737076101d..e6bfe5d0c525 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4763,7 +4763,7 @@ static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
}
}
-static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+static __always_inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
diff --git a/include/net/gro.h b/include/net/gro.h
index b65f631c521d..2300b6da05b2 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *);
+int udp6_gro_complete(struct sk_buff *, int);
#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
({ \
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 25143f156957..b38327606454 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2324,8 +2324,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct tcphdr *th);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
#ifdef CONFIG_INET
void tcp_gro_complete(struct sk_buff *skb);
#else