summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-02-10 20:57:52 -0800
committerJakub Kicinski <kuba@kernel.org>2026-02-10 20:57:53 -0800
commit70f1fbee85a4ee1c4d9278dff30b51f16e1d99db (patch)
tree8e7f893b02e7c9d44c143eae2c304a7a84e65c7e
parent792aaea994537daa78f31a86c948ccbefa8f4706 (diff)
parent97d7ae6e14c80ec0c2558d24d818212590f2d64f (diff)
Merge branch 'ipv6-tcp-no-longer-rebuild-fl6-at-each-transmit'
Eric Dumazet says: ==================== ipv6: tcp: no longer rebuild fl6 at each transmit TCP v6 spends a good amount of time rebuilding a fresh fl6 at each transmit in inet6_csk_xmit()/inet6_csk_route_socket(). TCP v4 caches the information in inet->cork.fl.u.ip4 instead. This series changes TCP v6 to behave the same, saving cpu cycles and reducing cache line misses and stack use. ==================== Link: https://patch.msgid.link/20260206173426.1638518-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/ipv6.h4
-rw-r--r--include/net/inet6_connection_sock.h4
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/datagram.c21
-rw-r--r--net/ipv6/inet6_connection_sock.c57
-rw-r--r--net/ipv6/tcp_ipv6.c64
6 files changed, 82 insertions, 72 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index bdbd63f9a85e..443053a76dcf 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -230,6 +230,10 @@ struct ipv6_fl_socklist;
struct ipv6_pinfo {
/* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */
struct in6_addr saddr;
+ union {
+ struct in6_addr daddr;
+ struct in6_addr final;
+ };
__be32 flow_label;
u32 dst_cookie;
struct ipv6_txoptions __rcu *opt;
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 745891d2e113..ece8dabd209a 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -18,7 +18,9 @@ struct sk_buff;
struct sock;
struct sockaddr;
-struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *inet6_csk_route_req(const struct sock *sk,
+ struct dst_entry *dst,
+ struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0476dbc8edb2..31ba677d0442 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -825,7 +825,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- struct in6_addr *final_p, final;
+ struct in6_addr *final_p;
struct dst_entry *dst;
struct flowi6 *fl6;
@@ -847,7 +847,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
rcu_read_lock();
- final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &np->final);
rcu_read_unlock();
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 83e03176819c..c564b68a0562 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -72,12 +72,12 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
{
struct ip6_flowlabel *flowlabel = NULL;
- struct in6_addr *final_p, final;
- struct ipv6_txoptions *opt;
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
+ struct ipv6_txoptions *opt;
+ struct in6_addr *final_p;
+ struct dst_entry *dst;
+ struct flowi6 *fl6;
int err = 0;
if (inet6_test_bit(SNDFLOW, sk) &&
@@ -86,14 +86,15 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
if (IS_ERR(flowlabel))
return -EINVAL;
}
- ip6_datagram_flow_key_init(&fl6, sk);
+ fl6 = &inet_sk(sk)->cork.fl.u.ip6;
+ ip6_datagram_flow_key_init(fl6, sk);
rcu_read_lock();
opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(fl6, opt, &np->final);
rcu_read_unlock();
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
@@ -101,17 +102,17 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
if (fix_sk_saddr) {
if (ipv6_addr_any(&np->saddr))
- np->saddr = fl6.saddr;
+ np->saddr = fl6->saddr;
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- sk->sk_v6_rcv_saddr = fl6.saddr;
+ sk->sk_v6_rcv_saddr = fl6->saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
}
- ip6_sk_dst_store_flow(sk, dst, &fl6);
+ ip6_sk_dst_store_flow(sk, dst, fl6);
out:
fl6_sock_release(flowlabel);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e30172e634a6..11fc2f7de2fe 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -25,6 +25,7 @@
#include <net/sock_reuseport.h>
struct dst_entry *inet6_csk_route_req(const struct sock *sk,
+ struct dst_entry *dst,
struct flowi6 *fl6,
const struct request_sock *req,
u8 proto)
@@ -32,7 +33,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
const struct inet_request_sock *ireq = inet_rsk(req);
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
- struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = proto;
@@ -48,10 +48,11 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
fl6->flowi6_uid = sk_uid(sk);
security_req_classify_flow(req, flowi6_to_flowi_common(fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
- if (IS_ERR(dst))
- return NULL;
-
+ if (!dst) {
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+ if (IS_ERR(dst))
+ return NULL;
+ }
return dst;
}
@@ -60,7 +61,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *final_p, final;
+ struct in6_addr *final_p;
struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
@@ -77,41 +78,41 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
rcu_read_lock();
- final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &np->final);
rcu_read_unlock();
- dst = __sk_dst_check(sk, np->dst_cookie);
- if (!dst) {
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+ dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
+
+ if (!IS_ERR(dst))
+ ip6_dst_store(sk, dst, false, false);
- if (!IS_ERR(dst))
- ip6_dst_store(sk, dst, false, false);
- }
return dst;
}
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
+ struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
struct dst_entry *dst;
int res;
- dst = inet6_csk_route_socket(sk, &fl6);
- if (IS_ERR(dst)) {
- WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst));
- sk->sk_route_caps = 0;
- kfree_skb(skb);
- return PTR_ERR(dst);
+ dst = __sk_dst_check(sk, np->dst_cookie);
+ if (unlikely(!dst)) {
+ dst = inet6_csk_route_socket(sk, fl6);
+ if (IS_ERR(dst)) {
+ WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst));
+ sk->sk_route_caps = 0;
+ kfree_skb(skb);
+ return PTR_ERR(dst);
+ }
+ /* Restore final destination back after routing done */
+ fl6->daddr = sk->sk_v6_daddr;
}
rcu_read_lock();
skb_dst_set_noref(skb, dst);
- /* Restore final destination back after routing done */
- fl6.daddr = sk->sk_v6_daddr;
-
- res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
+ res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
return res;
@@ -120,13 +121,15 @@ EXPORT_SYMBOL_GPL(inet6_csk_xmit);
struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
{
- struct flowi6 fl6;
- struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
+ struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6;
+ struct dst_entry *dst;
+
+ dst = inet6_csk_route_socket(sk, fl6);
if (IS_ERR(dst))
return NULL;
dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
- dst = inet6_csk_route_socket(sk, &fl6);
+ dst = inet6_csk_route_socket(sk, fl6);
return IS_ERR(dst) ? NULL : dst;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c65a5bfd322a..d10487b4e5bf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -138,15 +138,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_connection_sock *icsk = inet_csk(sk);
- struct in6_addr *saddr = NULL, *final_p, final;
struct inet_timewait_death_row *tcp_death_row;
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+ struct in6_addr *saddr = NULL, *final_p;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct ipv6_txoptions *opt;
struct dst_entry *dst;
- struct flowi6 fl6;
+ struct flowi6 *fl6;
int addr_type;
int err;
@@ -156,14 +156,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- memset(&fl6, 0, sizeof(fl6));
+ fl6 = &inet_sk(sk)->cork.fl.u.ip6;
+ memset(fl6, 0, sizeof(*fl6));
if (inet6_test_bit(SNDFLOW, sk)) {
- fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl6.flowlabel);
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl6->flowlabel);
+ if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
fl6_sock_release(flowlabel);
@@ -212,7 +213,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
}
sk->sk_v6_daddr = usin->sin6_addr;
- np->flow_label = fl6.flowlabel;
+ np->flow_label = fl6->flowlabel;
/*
* TCP over IPv4
@@ -260,24 +261,24 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
saddr = &sk->sk_v6_rcv_saddr;
- fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = sk->sk_v6_daddr;
- fl6.saddr = saddr ? *saddr : np->saddr;
- fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = usin->sin6_port;
- fl6.fl6_sport = inet->inet_sport;
- if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport)
- fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT;
- fl6.flowi6_uid = sk_uid(sk);
+ fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->daddr = sk->sk_v6_daddr;
+ fl6->saddr = saddr ? *saddr : np->saddr;
+ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_dport = usin->sin6_port;
+ fl6->fl6_sport = inet->inet_sport;
+ if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport)
+ fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT;
+ fl6->flowi6_uid = sk_uid(sk);
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(fl6, opt, &np->final);
- security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
- dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
@@ -287,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!saddr) {
- saddr = &fl6.saddr;
+ saddr = &fl6->saddr;
err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
if (err)
@@ -538,7 +539,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
u8 tclass;
/* First, grab a route. */
- if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
+ if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req,
IPPROTO_TCP)) == NULL)
goto done;
@@ -788,7 +789,7 @@ static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
if (security_inet_conn_request(sk, skb, req))
return NULL;
- return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
+ return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP);
}
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
@@ -1317,12 +1318,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct request_sock *req_unhash,
bool *own_req)
{
- struct inet_request_sock *ireq;
- struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+ struct inet_request_sock *ireq;
struct ipv6_txoptions *opt;
struct inet_sock *newinet;
bool found_dup_sk = false;
+ struct ipv6_pinfo *newnp;
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
@@ -1391,11 +1392,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (sk_acceptq_is_full(sk))
goto exit_overflow;
- if (!dst) {
- dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
- if (!dst)
- goto exit;
- }
+ dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP);
+ if (!dst)
+ goto exit;
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
@@ -1411,6 +1410,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
inet6_sk_rx_dst_set(newsk, skb);
newinet = inet_sk(newsk);
+ newinet->cork.fl.u.ip6 = fl6;
newinet->pinet6 = tcp_inet6_sk(newsk);
newinet->ipv6_fl_list = NULL;
newinet->inet_opt = NULL;