summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-02-02 17:49:31 -0800
committerJakub Kicinski <kuba@kernel.org>2026-02-02 17:49:32 -0800
commit8755aae4aa7569f079135a0590dbd0f7adcfacf4 (patch)
tree892fa362d9916a99eeb66b3bac3d61728d5e6212
parente0221553436b1c49a14ae251d95ea2e19c5b5a14 (diff)
parentb409a7f7176bb8fc0002b8592d14b11ebe481b1d (diff)
Merge branch 'ipv6-misc-changes-in-output-path'
Eric Dumazet says: ==================== ipv6: misc changes in output path Small optimizations mostly in ip6_xmit() path. TX performance increases by about 3 %. Patches 5-7: add dst4_mtu() and dst6_mtu() to save space. Last patch colocates inet6_cork in inet_cork_full. This series reduces kernel size by 494 bytes on x86_64: scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 4/2 grow/shrink: 9/23 up/down: 665/-1159 (-494) Function old new delta ip6_finish_output_gso_slowpath_drop - 197 +197 ip6_xmit 1452 1595 +143 do_ipv6_getsockopt 2855 2950 +95 kzalloc_noprof - 55 +55 ip4ip6_err 918 955 +37 __icmp_send 1499 1532 +33 do_ip_getsockopt 2573 2605 +32 __ip6_append_data 4109 4137 +28 __pfx_kzalloc_noprof - 16 +16 __pfx_ip6_finish_output_gso_slowpath_drop - 16 +16 ipmr_prepare_xmit 1232 1238 +6 ip6_forward 1905 1909 +4 ip6_cork_release 108 111 +3 ipv6_push_nfrag_opts 489 486 -3 ipv6_push_frag_opts 90 87 -3 ip6_finish_output2 1446 1437 -9 ip6_tnl_xmit 2639 2627 -12 ip6_default_advmss 176 160 -16 __ip6_rt_update_pmtu 1087 1071 -16 tcp_v6_syn_recv_sock 1715 1696 -19 tcp_v4_syn_recv_sock 1107 1088 -19 __ip_make_skb 1339 1320 -19 ip_setup_cork 406 385 -21 ip6_setup_cork 732 710 -22 rawv6_push_pending_frames 581 556 -25 ip6_push_pending_frames 184 157 -27 udpv6_splice_eof 203 170 -33 ip6_flush_pending_frames 220 183 -37 ip6_append_data 349 312 -37 udp_v6_push_pending_frames 155 115 -40 sit_tunnel_xmit 1957 1914 -43 __pfx_dst_mtu 64 - -64 tcp_v4_mtu_reduced 289 220 -69 tcp_v6_mtu_reduced 209 139 -70 ip6_make_skb 574 484 -90 ip6_finish_output 827 697 -130 dst_mtu 160 - -160 fib6_nh_mtu_change 511 336 -175 Total: Before=22584400, After=22583906, chg -0.00% ==================== Link: https://patch.msgid.link/20260130210303.3888261-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/ipv6.h8
-rw-r--r--include/net/dst.h6
-rw-r--r--include/net/inet_sock.h10
-rw-r--r--include/net/ip6_route.h6
-rw-r--r--include/net/ipv6.h16
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv6/exthdrs.c55
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_output.c108
-rw-r--r--net/ipv6/ip6_tunnel.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c9
20 files changed, 144 insertions, 125 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7294e4e89b79..20aae8357dd1 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -205,13 +205,6 @@ struct ipv6_mc_socklist;
struct ipv6_ac_socklist;
struct ipv6_fl_socklist;
-struct inet6_cork {
- struct ipv6_txoptions *opt;
- u8 hop_limit;
- u8 tclass;
- u8 dontfrag:1;
-};
-
/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
/* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */
@@ -267,7 +260,6 @@ struct ipv6_pinfo {
struct sk_buff *pktoptions;
struct sk_buff *rxpmtu;
- struct inet6_cork cork;
struct ipv6_mc_socklist __rcu *ipv6_mc_list;
struct ipv6_ac_socklist *ipv6_ac_list;
diff --git a/include/net/dst.h b/include/net/dst.h
index f8aa1239b4db..307073eae7f8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -219,6 +219,12 @@ static inline u32 dst_mtu(const struct dst_entry *dst)
return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
}
+/* Variant of dst_mtu() for IPv4 users. */
+static inline u32 dst4_mtu(const struct dst_entry *dst)
+{
+ return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst);
+}
+
/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
{
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 903b2263ec80..7cdcbed3e5cb 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -159,6 +159,13 @@ static inline bool inet_sk_bound_dev_eq(const struct net *net,
#endif
}
+struct inet6_cork {
+ struct ipv6_txoptions *opt;
+ u8 hop_limit;
+ u8 tclass;
+ u8 dontfrag:1;
+};
+
struct inet_cork {
unsigned int flags;
__be32 addr;
@@ -179,6 +186,9 @@ struct inet_cork {
struct inet_cork_full {
struct inet_cork base;
struct flowi fl;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_cork base6;
+#endif
};
struct ip_mc_socklist;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7c5512baa4b2..a55f9bf95fe3 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -266,6 +266,12 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+/* Variant of dst_mtu() for IPv6 users */
+static inline u32 dst6_mtu(const struct dst_entry *dst)
+{
+ return INDIRECT_CALL_1(dst->ops->mtu, ip6_mtu, dst);
+}
+
static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
{
const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cf2203ff2bfd..c27b9d7aeb7c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1107,8 +1107,7 @@ void ip6_flush_pending_frames(struct sock *sk);
int ip6_send_skb(struct sk_buff *skb);
struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
- struct inet_cork_full *cork,
- struct inet6_cork *v6_cork);
+ struct inet_cork_full *cork);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
@@ -1119,8 +1118,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
- return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
- &inet6_sk(sk)->cork);
+ return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork);
}
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
@@ -1151,11 +1149,11 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
* Extension header (options) processing
*/
-void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto, struct in6_addr **daddr_p,
- struct in6_addr *saddr);
-void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto);
+u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 proto, struct in6_addr **daddr_p,
+ struct in6_addr *saddr);
+u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 proto);
int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
__be16 *frag_offp);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 19c9c838967f..1d362a17a1c4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -945,7 +945,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
/* RFC says return as much as we can without exceeding 576 bytes. */
- room = dst_mtu(&rt->dst);
+ room = dst4_mtu(&rt->dst);
if (room > 576)
room = 576;
room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.optlen;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 75fcb58795bb..e4790cc7b5c2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1300,7 +1300,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
return -EFAULT;
cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+ dst4_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
if (!inetdev_valid_mtu(cork->fragsize))
return -ENETUNREACH;
@@ -1439,7 +1439,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
pmtudisc = READ_ONCE(inet->pmtudisc);
if (pmtudisc == IP_PMTUDISC_DO ||
pmtudisc == IP_PMTUDISC_PROBE ||
- (skb->len <= dst_mtu(&rt->dst) &&
+ (skb->len <= dst4_mtu(&rt->dst) &&
ip_dont_fragment(sk, &rt->dst)))
df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6d9c5c20b1c4..c062d9519818 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1634,7 +1634,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
val = 0;
dst = sk_dst_get(sk);
if (dst) {
- val = dst_mtu(dst);
+ val = dst4_mtu(dst);
dst_release(dst);
}
if (!val)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ca9eaee4c2ef..131382c388e9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1895,7 +1895,7 @@ static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt,
return -1;
}
- if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
+ if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
/* Do not fragment multicasts. Alas, IPv4 does not
* allow to send ICMP, so that packets will disappear
* to blackhole.
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fae4aa4a5f09..fecf6621f679 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -303,7 +303,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
goto free_nskb;
/* "Never happens" */
- if (nskb->len > dst_mtu(skb_dst(nskb)))
+ if (nskb->len > dst4_mtu(skb_dst(nskb)))
goto free_nskb;
nf_ct_attach(nskb, oldskb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0fc8a42921aa..01fd56347260 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -374,7 +374,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct dst_entry *dst;
- u32 mtu;
+ u32 mtu, dmtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
@@ -386,15 +386,14 @@ void tcp_v4_mtu_reduced(struct sock *sk)
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+ dmtu = dst4_mtu(dst);
+ if (mtu < dmtu && ip_dont_fragment(sk, dst))
WRITE_ONCE(sk->sk_err_soft, EMSGSIZE);
- mtu = dst_mtu(dst);
-
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
ip_sk_accept_pmtu(sk) &&
- inet_csk(sk)->icsk_pmtu_cookie > mtu) {
- tcp_sync_mss(sk, mtu);
+ inet_csk(sk)->icsk_pmtu_cookie > dmtu) {
+ tcp_sync_mss(sk, dmtu);
/* Resend the TCP packet because it's
* clear that the old packet has been
@@ -1760,7 +1759,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_ca_openreq_child(newsk, dst);
- tcp_sync_mss(newsk, dst_mtu(dst));
+ tcp_sync_mss(newsk, dst4_mtu(dst));
newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
tcp_initialize_rcv_mss(newsk);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index cd318ff0dcd8..209fdf1b1aa9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1074,9 +1074,9 @@ fail_and_free:
* for headers.
*/
-static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
- struct ipv6_rt_hdr *opt,
- struct in6_addr **addr_p, struct in6_addr *saddr)
+static u8 ipv6_push_rthdr0(struct sk_buff *skb, u8 proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
{
struct rt0_hdr *phdr, *ihdr;
int hops;
@@ -1095,13 +1095,13 @@ static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
phdr->addr[hops - 1] = **addr_p;
*addr_p = ihdr->addr;
- phdr->rt_hdr.nexthdr = *proto;
- *proto = NEXTHDR_ROUTING;
+ phdr->rt_hdr.nexthdr = proto;
+ return NEXTHDR_ROUTING;
}
-static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
- struct ipv6_rt_hdr *opt,
- struct in6_addr **addr_p, struct in6_addr *saddr)
+static u8 ipv6_push_rthdr4(struct sk_buff *skb, u8 proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
{
struct ipv6_sr_hdr *sr_phdr, *sr_ihdr;
int plen, hops;
@@ -1144,58 +1144,61 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
}
#endif
- sr_phdr->nexthdr = *proto;
- *proto = NEXTHDR_ROUTING;
+ sr_phdr->nexthdr = proto;
+ return NEXTHDR_ROUTING;
}
-static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
- struct ipv6_rt_hdr *opt,
- struct in6_addr **addr_p, struct in6_addr *saddr)
+static u8 ipv6_push_rthdr(struct sk_buff *skb, u8 proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p, struct in6_addr *saddr)
{
switch (opt->type) {
case IPV6_SRCRT_TYPE_0:
case IPV6_SRCRT_STRICT:
case IPV6_SRCRT_TYPE_2:
- ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
+ proto = ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
break;
case IPV6_SRCRT_TYPE_4:
- ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
+ proto = ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
break;
default:
break;
}
+ return proto;
}
-static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
+static u8 ipv6_push_exthdr(struct sk_buff *skb, u8 proto, u8 type, struct ipv6_opt_hdr *opt)
{
struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt));
memcpy(h, opt, ipv6_optlen(opt));
- h->nexthdr = *proto;
- *proto = type;
+ h->nexthdr = proto;
+ return type;
}
-void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto,
- struct in6_addr **daddr, struct in6_addr *saddr)
+u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 proto,
+ struct in6_addr **daddr, struct in6_addr *saddr)
{
if (opt->srcrt) {
- ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
+ proto = ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
/*
* IPV6_RTHDRDSTOPTS is ignored
* unless IPV6_RTHDR is set (RFC3542).
*/
if (opt->dst0opt)
- ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+ proto = ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
}
if (opt->hopopt)
- ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+ proto = ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+ return proto;
}
-void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
+u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 proto)
{
if (opt->dst1opt)
- ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+ proto = ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+ return proto;
}
EXPORT_SYMBOL(ipv6_push_frag_opts);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d19d86ed4376..dafcc0dcd77a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1057,7 +1057,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst) {
mtu = READ_ONCE(dst_dev(dst)->mtu);
- if (dst_mtu(dst) > mtu)
+ if (dst6_mtu(dst) > mtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f904739e99b9..e622a9e086cc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -80,7 +80,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
hdr = ipv6_hdr(skb);
daddr = &hdr->daddr;
- if (ipv6_addr_is_multicast(daddr)) {
+ if (unlikely(ipv6_addr_is_multicast(daddr))) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
@@ -179,8 +179,8 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
static int ip6_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *skb, unsigned int mtu)
{
- if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
- !skb_gso_validate_network_len(skb, mtu))
+ if (unlikely(!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
+ !skb_gso_validate_network_len(skb, mtu)))
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
return ip6_finish_output2(net, sk, skb);
@@ -202,8 +202,8 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
if (skb_is_gso(skb))
return ip6_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu ||
- (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
+ if (unlikely(skb->len > mtu ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
return ip6_finish_output2(net, sk, skb);
@@ -301,19 +301,20 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
}
}
- if (opt) {
+ if (unlikely(opt)) {
seg_len += opt->opt_nflen + opt->opt_flen;
if (opt->opt_flen)
- ipv6_push_frag_opts(skb, opt, &proto);
+ proto = ipv6_push_frag_opts(skb, opt, proto);
if (opt->opt_nflen)
- ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
- &fl6->saddr);
+ proto = ipv6_push_nfrag_opts(skb, opt, proto,
+ &first_hop,
+ &fl6->saddr);
}
if (unlikely(seg_len > IPV6_MAXPLEN)) {
- hop_jumbo = skb_push(skb, hoplen);
+ hop_jumbo = __skb_push(skb, hoplen);
hop_jumbo->nexthdr = proto;
hop_jumbo->hdrlen = 0;
@@ -326,7 +327,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
}
- skb_push(skb, sizeof(struct ipv6hdr));
+ __skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
@@ -352,8 +353,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->priority = priority;
skb->mark = mark;
- mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
+ mtu = dst6_mtu(dst);
+ if (likely((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
/* if egress device is enslaved to an L3 master device pass the
@@ -382,7 +383,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
unlock:
rcu_read_unlock();
return ret;
@@ -653,7 +654,7 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (ip6_pkt_too_big(skb, mtu)) {
+ if (unlikely(ip6_pkt_too_big(skb, mtu))) {
/* Again, force OUTPUT device used as source address */
skb->dev = dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1352,12 +1353,13 @@ static void ip6_append_data_mtu(unsigned int *mtu,
}
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
- struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
+ struct ipcm6_cookie *ipc6,
struct rt6_info *rt)
{
+ struct ipv6_txoptions *nopt, *opt = ipc6->opt;
+ struct inet6_cork *v6_cork = &cork->base6;
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu, frag_size;
- struct ipv6_txoptions *nopt, *opt = ipc6->opt;
/* callers pass dst together with a reference, set it first so
* ip6_cork_release() can put it down even in case of an error.
@@ -1367,7 +1369,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
/*
* setup for corking
*/
- if (opt) {
+ if (unlikely(opt)) {
if (WARN_ON(v6_cork->opt))
return -EINVAL;
@@ -1402,10 +1404,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->dontfrag = ipc6->dontfrag;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
- READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
+ READ_ONCE(rt->dst.dev->mtu) : dst6_mtu(&rt->dst);
else
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
- READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
+ READ_ONCE(rt->dst.dev->mtu) : dst6_mtu(xfrm_dst_path(&rt->dst));
frag_size = READ_ONCE(np->frag_size);
if (frag_size && frag_size < mtu)
@@ -1430,17 +1432,17 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
static int __ip6_append_data(struct sock *sk,
struct sk_buff_head *queue,
struct inet_cork_full *cork_full,
- struct inet6_cork *v6_cork,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
unsigned int flags)
{
- struct sk_buff *skb, *skb_prev = NULL;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct inet6_cork *v6_cork = &cork_full->base6;
struct inet_cork *cork = &cork_full->base;
struct flowi6 *fl6 = &cork_full->fl.u.ip6;
- unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct sk_buff *skb, *skb_prev = NULL;
struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
@@ -1843,7 +1845,6 @@ int ip6_append_data(struct sock *sk,
struct rt6_info *rt, unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
int exthdrlen;
int err;
@@ -1854,7 +1855,7 @@ int ip6_append_data(struct sock *sk,
* setup for corking
*/
dst_hold(&rt->dst);
- err = ip6_setup_cork(sk, &inet->cork, &np->cork,
+ err = ip6_setup_cork(sk, &inet->cork,
ipc6, rt);
if (err)
return err;
@@ -1868,7 +1869,7 @@ int ip6_append_data(struct sock *sk,
}
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
- &np->cork, sk_page_frag(sk), getfrag,
+ sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1881,10 +1882,11 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
skb_dst_set(skb, dst);
}
-static void ip6_cork_release(struct inet_cork_full *cork,
- struct inet6_cork *v6_cork)
+static void ip6_cork_release(struct inet_cork_full *cork)
{
- if (v6_cork->opt) {
+ struct inet6_cork *v6_cork = &cork->base6;
+
+ if (unlikely(v6_cork->opt)) {
struct ipv6_txoptions *opt = v6_cork->opt;
kfree(opt->dst0opt);
@@ -1903,15 +1905,14 @@ static void ip6_cork_release(struct inet_cork_full *cork,
struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff_head *queue,
- struct inet_cork_full *cork,
- struct inet6_cork *v6_cork)
+ struct inet_cork_full *cork)
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst;
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
- struct ipv6_txoptions *opt = v6_cork->opt;
+ struct ipv6_txoptions *opt;
struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
@@ -1940,19 +1941,22 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
__skb_pull(skb, skb_network_header_len(skb));
final_dst = &fl6->daddr;
- if (opt && opt->opt_flen)
- ipv6_push_frag_opts(skb, opt, &proto);
- if (opt && opt->opt_nflen)
- ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
-
+ opt = cork->base6.opt;
+ if (unlikely(opt)) {
+ if (opt->opt_flen)
+ proto = ipv6_push_frag_opts(skb, opt, proto);
+ if (opt->opt_nflen)
+ proto = ipv6_push_nfrag_opts(skb, opt, proto,
+ &final_dst, &fl6->saddr);
+ }
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, v6_cork->tclass,
+ ip6_flow_hdr(hdr, cork->base6.tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
ip6_autoflowlabel(net, sk), fl6));
- hdr->hop_limit = v6_cork->hop_limit;
+ hdr->hop_limit = cork->base6.hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
@@ -1966,7 +1970,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
- if (proto == IPPROTO_ICMPV6) {
+ if (unlikely(proto == IPPROTO_ICMPV6)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
@@ -1979,7 +1983,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
- ip6_cork_release(cork, v6_cork);
+ ip6_cork_release(cork);
out:
return skb;
}
@@ -2018,8 +2022,7 @@ EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
static void __ip6_flush_pending_frames(struct sock *sk,
struct sk_buff_head *queue,
- struct inet_cork_full *cork,
- struct inet6_cork *v6_cork)
+ struct inet_cork_full *cork)
{
struct sk_buff *skb;
@@ -2030,13 +2033,13 @@ static void __ip6_flush_pending_frames(struct sock *sk,
kfree_skb(skb);
}
- ip6_cork_release(cork, v6_cork);
+ ip6_cork_release(cork);
}
void ip6_flush_pending_frames(struct sock *sk)
{
__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
- &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
+ &inet_sk(sk)->cork);
}
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
@@ -2047,9 +2050,8 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
unsigned int flags, struct inet_cork_full *cork)
{
- struct inet6_cork v6_cork;
- struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
+ struct sk_buff_head queue;
int err;
if (flags & MSG_PROBE) {
@@ -2062,21 +2064,21 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork->base.flags = 0;
cork->base.addr = 0;
cork->base.opt = NULL;
- v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
+ cork->base6.opt = NULL;
+ err = ip6_setup_cork(sk, cork, ipc6, rt);
if (err) {
- ip6_cork_release(cork, &v6_cork);
+ ip6_cork_release(cork);
return ERR_PTR(err);
}
- err = __ip6_append_data(sk, &queue, cork, &v6_cork,
+ err = __ip6_append_data(sk, &queue, cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags);
if (err) {
- __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
+ __ip6_flush_pending_frames(sk, &queue, cork);
return ERR_PTR(err);
}
- return __ip6_make_skb(sk, &queue, cork, &v6_cork);
+ return __ip6_make_skb(sk, &queue, cork);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f68f6f110a3e..4c29aa94e86e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -638,7 +638,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* change mtu on this route */
if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
- if (rel_info > dst_mtu(skb_dst(skb2)))
+ if (rel_info > dst6_mtu(skb_dst(skb2)))
goto out;
skb_dst_update_pmtu_no_confirm(skb2, rel_info);
@@ -1187,7 +1187,7 @@ route_lookup:
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
+ mtu = dst6_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1265,7 +1265,7 @@ route_lookup:
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
- ipv6_push_frag_opts(skb, &opt.ops, &proto);
+ proto = ipv6_push_frag_opts(skb, &opt.ops, proto);
}
skb_push(skb, sizeof(struct ipv6hdr));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a61e742794f9..d784a8644ff2 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1184,7 +1184,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
rcu_read_lock();
dst = __sk_dst_get(sk);
if (dst)
- val = dst_mtu(dst);
+ val = dst6_mtu(dst);
rcu_read_unlock();
if (!val)
return -ENOTCONN;
@@ -1283,7 +1283,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
rcu_read_lock();
dst = __sk_dst_get(sk);
if (dst)
- mtuinfo.ip6m_mtu = dst_mtu(dst);
+ mtuinfo.ip6m_mtu = dst6_mtu(dst);
rcu_read_unlock();
if (!mtuinfo.ip6m_mtu)
return -ENOTCONN;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b4cd05dba9b6..ee6beba03e9b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -529,7 +529,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
offset = rp->offset;
total_len = inet_sk(sk)->cork.base.length;
- opt = inet6_sk(sk)->cork.opt;
+ opt = inet_sk(sk)->cork.base6.opt;
total_len -= opt ? opt->opt_flen : 0;
if (offset >= total_len - 1) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3cfa7ae1294b..c0350d97307e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2049,6 +2049,8 @@ unlock:
static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
struct rt6_info *rt, int mtu)
{
+ u32 dmtu = dst6_mtu(&rt->dst);
+
/* If the new MTU is lower than the route PMTU, this new MTU will be the
* lowest MTU in the path: always allow updating the route PMTU to
* reflect PMTU decreases.
@@ -2059,10 +2061,10 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
* handle this.
*/
- if (dst_mtu(&rt->dst) >= mtu)
+ if (dmtu >= mtu)
return true;
- if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+ if (dmtu == idev->cnf.mtu6)
return true;
return false;
@@ -2932,7 +2934,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (mtu < IPV6_MIN_MTU)
return;
- if (mtu >= dst_mtu(dst))
+ if (mtu >= dst6_mtu(dst))
return;
if (!rt6_cache_allowed_for_pmtu(rt6)) {
@@ -3248,7 +3250,7 @@ EXPORT_SYMBOL_GPL(ip6_sk_redirect);
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
- unsigned int mtu = dst_mtu(dst);
+ unsigned int mtu = dst6_mtu(dst);
struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cf37ad9686e6..439c8a1c6625 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -962,7 +962,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (df) {
- mtu = dst_mtu(&rt->dst) - t_hlen;
+ mtu = dst4_mtu(&rt->dst) - t_hlen;
if (mtu < IPV4_MIN_MTU) {
DEV_STATS_INC(dev, collisions);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8bf29186c15f..c65a5bfd322a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -351,7 +351,7 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
- u32 mtu;
+ u32 mtu, dmtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
@@ -368,8 +368,9 @@ static void tcp_v6_mtu_reduced(struct sock *sk)
if (!dst)
return;
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
- tcp_sync_mss(sk, dst_mtu(dst));
+ dmtu = dst6_mtu(dst);
+ if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) {
+ tcp_sync_mss(sk, dmtu);
tcp_simple_retransmit(sk);
}
}
@@ -1467,7 +1468,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_ca_openreq_child(newsk, dst);
- tcp_sync_mss(newsk, dst_mtu(dst));
+ tcp_sync_mss(newsk, dst6_mtu(dst));
newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
tcp_initialize_rcv_mss(newsk);