diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-02-10 20:57:52 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-02-10 20:57:53 -0800 |
| commit | 70f1fbee85a4ee1c4d9278dff30b51f16e1d99db (patch) | |
| tree | 8e7f893b02e7c9d44c143eae2c304a7a84e65c7e | |
| parent | 792aaea994537daa78f31a86c948ccbefa8f4706 (diff) | |
| parent | 97d7ae6e14c80ec0c2558d24d818212590f2d64f (diff) | |
Merge branch 'ipv6-tcp-no-longer-rebuild-fl6-at-each-transmit'
Eric Dumazet says:
====================
ipv6: tcp: no longer rebuild fl6 at each transmit
TCP v6 spends a good amount of time rebuilding a fresh fl6 at each
transmit in inet6_csk_xmit()/inet6_csk_route_socket().
TCP v4 caches the information in inet->cork.fl.u.ip4 instead.
This series changes TCP v6 to behave the same, saving cpu cycles
and reducing cache line misses and stack use.
====================
Link: https://patch.msgid.link/20260206173426.1638518-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | include/linux/ipv6.h | 4 | ||||
| -rw-r--r-- | include/net/inet6_connection_sock.h | 4 | ||||
| -rw-r--r-- | net/ipv6/af_inet6.c | 4 | ||||
| -rw-r--r-- | net/ipv6/datagram.c | 21 | ||||
| -rw-r--r-- | net/ipv6/inet6_connection_sock.c | 57 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 64 |
6 files changed, 82 insertions, 72 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bdbd63f9a85e..443053a76dcf 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -230,6 +230,10 @@ struct ipv6_fl_socklist; struct ipv6_pinfo { /* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */ struct in6_addr saddr; + union { + struct in6_addr daddr; + struct in6_addr final; + }; __be32 flow_label; u32 dst_cookie; struct ipv6_txoptions __rcu *opt; diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 745891d2e113..ece8dabd209a 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -18,7 +18,9 @@ struct sk_buff; struct sock; struct sockaddr; -struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, +struct dst_entry *inet6_csk_route_req(const struct sock *sk, + struct dst_entry *dst, + struct flowi6 *fl6, const struct request_sock *req, u8 proto); int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0476dbc8edb2..31ba677d0442 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -825,7 +825,7 @@ int inet6_sk_rebuild_header(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p, final; + struct in6_addr *final_p; struct dst_entry *dst; struct flowi6 *fl6; @@ -847,7 +847,7 @@ int inet6_sk_rebuild_header(struct sock *sk) security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); - final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &np->final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 83e03176819c..c564b68a0562 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -72,12 +72,12 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) { struct ip6_flowlabel *flowlabel = NULL; - struct in6_addr *final_p, final; - struct ipv6_txoptions *opt; - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; + struct ipv6_txoptions *opt; + struct in6_addr *final_p; + struct dst_entry *dst; + struct flowi6 *fl6; int err = 0; if (inet6_test_bit(SNDFLOW, sk) && @@ -86,14 +86,15 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) if (IS_ERR(flowlabel)) return -EINVAL; } - ip6_datagram_flow_key_init(&fl6, sk); + fl6 = &inet_sk(sk)->cork.fl.u.ip6; + ip6_datagram_flow_key_init(fl6, sk); rcu_read_lock(); opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &np->final); rcu_read_unlock(); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; @@ -101,17 +102,17 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) if (fix_sk_saddr) { if (ipv6_addr_any(&np->saddr)) - np->saddr = fl6.saddr; + np->saddr = fl6->saddr; if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - sk->sk_v6_rcv_saddr = fl6.saddr; + sk->sk_v6_rcv_saddr = fl6->saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } } - ip6_sk_dst_store_flow(sk, dst, &fl6); + ip6_sk_dst_store_flow(sk, dst, fl6); out: fl6_sock_release(flowlabel); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e30172e634a6..11fc2f7de2fe 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -25,6 +25,7 @@ #include <net/sock_reuseport.h> struct dst_entry *inet6_csk_route_req(const struct sock *sk, + struct dst_entry *dst, struct flowi6 *fl6, const struct request_sock *req, u8 proto) @@ -32,7 +33,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; - struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = proto; @@ -48,10 +48,11 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); - if (IS_ERR(dst)) - return NULL; - + if (!dst) { + dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); + if (IS_ERR(dst)) + return NULL; + } return dst; } @@ -60,7 +61,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *final_p, final; + struct in6_addr *final_p; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); @@ -77,41 +78,41 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); - final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &np->final); rcu_read_unlock(); - dst = __sk_dst_check(sk, np->dst_cookie); - if (!dst) { - dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); + dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); + + if (!IS_ERR(dst)) + ip6_dst_store(sk, dst, false, false); - if (!IS_ERR(dst)) - ip6_dst_store(sk, dst, false, false); - } return dst; } int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { + struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; struct dst_entry *dst; int res; - dst = inet6_csk_route_socket(sk, &fl6); - if (IS_ERR(dst)) { - WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); - sk->sk_route_caps = 0; - kfree_skb(skb); - return PTR_ERR(dst); + dst = __sk_dst_check(sk, np->dst_cookie); + if (unlikely(!dst)) { + dst = inet6_csk_route_socket(sk, fl6); + if (IS_ERR(dst)) { + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); + sk->sk_route_caps = 0; + kfree_skb(skb); + return PTR_ERR(dst); + } + /* Restore final destination back after routing done */ + fl6->daddr = sk->sk_v6_daddr; } rcu_read_lock(); skb_dst_set_noref(skb, dst); - /* Restore final destination back after routing done */ - fl6.daddr = sk->sk_v6_daddr; - - res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), + res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; @@ -120,13 +121,15 @@ EXPORT_SYMBOL_GPL(inet6_csk_xmit); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) { - struct flowi6 fl6; - struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); + struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; + struct dst_entry *dst; + + dst = inet6_csk_route_socket(sk, fl6); if (IS_ERR(dst)) return NULL; dst->ops->update_pmtu(dst, sk, NULL, mtu, true); - dst = inet6_csk_route_socket(sk, &fl6); + dst = inet6_csk_route_socket(sk, fl6); return IS_ERR(dst) ? NULL : dst; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c65a5bfd322a..d10487b4e5bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -138,15 +138,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_connection_sock *icsk = inet_csk(sk); - struct in6_addr *saddr = NULL, *final_p, final; struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); + struct in6_addr *saddr = NULL, *final_p; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct ipv6_txoptions *opt; struct dst_entry *dst; - struct flowi6 fl6; + struct flowi6 *fl6; int addr_type; int err; @@ -156,14 +156,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl6, 0, sizeof(fl6)); + fl6 = &inet_sk(sk)->cork.fl.u.ip6; + memset(fl6, 0, sizeof(*fl6)); if (inet6_test_bit(SNDFLOW, sk)) { - fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl6.flowlabel); - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6->flowlabel); + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); @@ -212,7 +213,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, } sk->sk_v6_daddr = usin->sin6_addr; - np->flow_label = fl6.flowlabel; + np->flow_label = fl6->flowlabel; /* * TCP over IPv4 @@ -260,24 +261,24 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = sk->sk_v6_daddr; - fl6.saddr = saddr ? *saddr : np->saddr; - fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = usin->sin6_port; - fl6.fl6_sport = inet->inet_sport; - if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) - fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; - fl6.flowi6_uid = sk_uid(sk); + fl6->flowi6_proto = IPPROTO_TCP; + fl6->daddr = sk->sk_v6_daddr; + fl6->saddr = saddr ? *saddr : np->saddr; + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = usin->sin6_port; + fl6->fl6_sport = inet->inet_sport; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) + fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; + fl6->flowi6_uid = sk_uid(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &np->final); - security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); - dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; @@ -287,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { - saddr = &fl6.saddr; + saddr = &fl6->saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); if (err) @@ -538,7 +539,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, u8 tclass; /* First, grab a route. */ - if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, + if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, IPPROTO_TCP)) == NULL) goto done; @@ -788,7 +789,7 @@ static struct dst_entry *tcp_v6_route_req(const struct sock *sk, if (security_inet_conn_request(sk, skb, req)) return NULL; - return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); + return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { @@ -1317,12 +1318,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * struct request_sock *req_unhash, bool *own_req) { - struct inet_request_sock *ireq; - struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = tcp_inet6_sk(sk); + struct inet_request_sock *ireq; struct ipv6_txoptions *opt; struct inet_sock *newinet; bool found_dup_sk = false; + struct ipv6_pinfo *newnp; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1391,11 +1392,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (!dst) { - dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); - if (!dst) - goto exit; - } + dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); + if (!dst) + goto exit; newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) @@ -1411,6 +1410,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * inet6_sk_rx_dst_set(newsk, skb); newinet = inet_sk(newsk); + newinet->cork.fl.u.ip6 = fl6; newinet->pinet6 = tcp_inet6_sk(newsk); newinet->ipv6_fl_list = NULL; newinet->inet_opt = NULL; |
