summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-23 16:50:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-23 16:50:42 -0700
commite728258debd553c95d2e70f9cd97c9fde27c7130 (patch)
tree18ef97c80f9923717f5cf6bdab44d77607ca0f4b /net/ipv4
parente8df5a0c0d041588e7f02781822d637d226cdbe8 (diff)
parent5e6391da4539c35422c0df1d1d2d9a9bb97cd736 (diff)
Merge tag 'net-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from Netfilter. Steady stream of fixes. Last two weeks feel comparable to the two weeks before the merge window. Lots of AI-aided bug discovery. A newer big source is Sashiko/Gemini (Roman Gushchin's system), which points out issues in existing code during patch review (maybe 25% of fixes here likely originating from Sashiko). Nice thing is these are often fixed by the respective maintainers, not drive-bys. Current release - new code bugs: - kconfig: MDIO_PIC64HPSC should depend on ARCH_MICROCHIP Previous releases - regressions: - add async ndo_set_rx_mode and switch drivers which we promised to be called under the per-netdev mutex to it - dsa: remove duplicate netdev_lock_ops() for conduit ethtool ops - hv_sock: report EOF instead of -EIO for FIN - vsock/virtio: fix MSG_PEEK calculation on bytes to copy Previous releases - always broken: - ipv6: fix possible UAF in icmpv6_rcv() - icmp: validate reply type before using icmp_pointers - af_unix: drop all SCM attributes for SOCKMAP - netfilter: fix a number of bugs in the osf (OS fingerprinting) - eth: intel: fix timestamp interrupt configuration for E825C Misc: - bunch of data-race annotations" * tag 'net-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (148 commits) rxrpc: Fix error handling in rxgk_extract_token() rxrpc: Fix re-decryption of RESPONSE packets rxrpc: Fix rxrpc_input_call_event() to only unshare DATA packets rxrpc: Fix missing validation of ticket length in non-XDR key preparsing rxgk: Fix potential integer overflow in length check rxrpc: Fix conn-level packet handling to unshare RESPONSE packets rxrpc: Fix potential UAF after skb_unshare() failure rxrpc: Fix rxkad crypto unalignment handling rxrpc: Fix memory leaks in rxkad_verify_response() net: rds: fix MR cleanup on copy error m68k: mvme147: Make me the maintainer net: txgbe: fix firmware version check selftests/bpf: check epoll readiness during reuseport migration tcp: call sk_data_ready() after listener migration vhost_net: fix sleeping with preempt-disabled in vhost_net_busy_poll() ipv6: Cap TLV scan in ip6_tnl_parse_tlv_enc_lim tipc: fix double-free in tipc_buf_append() llc: Return -EINPROGRESS from llc_ui_connect() ipv4: icmp: validate reply type before using icmp_pointers selftests/net: packetdrill: cover RFC 5961 5.2 challenge ACK on both edges ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/nexthop.c4
-rw-r--r--net/ipv4/tcp.c64
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cdg.c4
-rw-r--r--net/ipv4/tcp_cubic.c6
-rw-r--r--net/ipv4/tcp_dctcp.c2
-rw-r--r--net/ipv4/tcp_input.c52
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv4/tcp_nv.c4
-rw-r--r--net/ipv4/tcp_output.c19
-rw-r--r--net/ipv4/tcp_plb.c2
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/tcp_vegas.c9
-rw-r--r--net/ipv4/tcp_westwood.c4
-rw-r--r--net/ipv4/tcp_yeah.c3
19 files changed, 115 insertions, 86 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2f4fac22d1ab..7eeff658b467 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -64,6 +64,7 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
+#include <linux/nospec.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
@@ -371,7 +372,9 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
to, len);
skb->csum = csum_block_add(skb->csum, csum, odd);
- if (icmp_pointers[icmp_param->data.icmph.type].error)
+ if (icmp_param->data.icmph.type <= NR_ICMP_TYPES &&
+ icmp_pointers[array_index_nospec(icmp_param->data.icmph.type,
+ NR_ICMP_TYPES + 1)].error)
nf_ct_attach(skb, icmp_param->skb);
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ac3ae1bc1af..928654c34156 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1479,16 +1479,19 @@ void inet_csk_listen_stop(struct sock *sk)
if (nreq) {
refcount_set(&nreq->rsk_refcnt, 1);
+ rcu_read_lock();
if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
__NET_INC_STATS(sock_net(nsk),
LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(req);
+ READ_ONCE(nsk->sk_data_ready)(nsk);
} else {
__NET_INC_STATS(sock_net(nsk),
LINUX_MIB_TCPMIGRATEREQFAILURE);
reqsk_migrate_reset(nreq);
__reqsk_free(nreq);
}
+ rcu_read_unlock();
/* inet_csk_reqsk_queue_add() has already
* called inet_child_forget() on failure case.
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a5db7c67d61b..625a1ca13b1b 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -79,7 +79,7 @@ static int ipt_nat_register_lookups(struct net *net)
while (i)
nf_nat_ipv4_unregister_fn(net, &ops[--i]);
- kfree(ops);
+ kfree_rcu(ops, rcu);
return ret;
}
}
@@ -100,7 +100,7 @@ static void ipt_nat_unregister_lookups(struct net *net)
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++)
nf_nat_ipv4_unregister_fn(net, &ops[i]);
- kfree(ops);
+ kfree_rcu(ops, rcu);
}
static int iptable_nat_table_init(struct net *net)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 904a060a7330..f92fcc39fc4c 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2469,10 +2469,10 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
goto err_notify;
}
- /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
+ /* When replacing a nexthop with one of a different family, potentially
* update IPv4 indication in all the groups using the nexthop.
*/
- if (oldi->family == AF_INET && newi->family == AF_INET6) {
+ if (oldi->family != newi->family) {
list_for_each_entry(nhge, &old->grp_list, nh_list) {
struct nexthop *nhp = nhge->nh_parent;
struct nh_group *nhg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2014a6408e93..432fa28e47d4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3424,7 +3424,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto = TCP_TIMEOUT_INIT;
WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN);
WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX);
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH);
tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->is_cwnd_limited = 0;
@@ -3622,7 +3622,8 @@ static void tcp_enable_tx_delay(struct sock *sk, int val)
if (delta && sk->sk_state == TCP_ESTABLISHED) {
s64 srtt = (s64)tp->srtt_us + delta;
- tp->srtt_us = clamp_t(s64, srtt, 1, ~0U);
+ WRITE_ONCE(tp->srtt_us,
+ clamp_t(s64, srtt, 1, ~0U));
/* Note: does not deal with non zero icsk_backoff */
tcp_set_rto(sk);
@@ -4190,12 +4191,18 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
struct tcp_info *info)
{
u64 stats[__TCP_CHRONO_MAX], total = 0;
- enum tcp_chrono i;
+ enum tcp_chrono i, cur;
+ /* Following READ_ONCE()s pair with WRITE_ONCE()s in tcp_chrono_set().
+ * This is because socket lock might not be owned by us at this point.
+ * This is best effort, tcp_get_timestamping_opt_stats() can
+ * see wrong values. A real fix would be too costly for TCP fast path.
+ */
+ cur = READ_ONCE(tp->chrono_type);
for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
- stats[i] = tp->chrono_stat[i - 1];
- if (i == tp->chrono_type)
- stats[i] += tcp_jiffies32 - tp->chrono_start;
+ stats[i] = READ_ONCE(tp->chrono_stat[i - 1]);
+ if (i == cur)
+ stats[i] += tcp_jiffies32 - READ_ONCE(tp->chrono_start);
stats[i] *= USEC_PER_SEC / HZ;
total += stats[i];
}
@@ -4427,9 +4434,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
info.tcpi_sndbuf_limited, TCP_NLA_PAD);
nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT,
- tp->data_segs_out, TCP_NLA_PAD);
+ READ_ONCE(tp->data_segs_out), TCP_NLA_PAD);
nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
- tp->total_retrans, TCP_NLA_PAD);
+ READ_ONCE(tp->total_retrans), TCP_NLA_PAD);
rate = READ_ONCE(sk->sk_pacing_rate);
rate64 = (rate != ~0UL) ? rate : ~0ULL;
@@ -4438,37 +4445,42 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
- nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
- nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+ nla_put_u32(stats, TCP_NLA_SND_CWND, READ_ONCE(tp->snd_cwnd));
+ nla_put_u32(stats, TCP_NLA_REORDERING, READ_ONCE(tp->reordering));
+ nla_put_u32(stats, TCP_NLA_MIN_RTT, data_race(tcp_min_rtt(tp)));
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS,
READ_ONCE(inet_csk(sk)->icsk_retransmits));
- nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
- nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
- nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
- nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
-
- nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+ nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited));
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh));
+ nla_put_u32(stats, TCP_NLA_DELIVERED, READ_ONCE(tp->delivered));
+ nla_put_u32(stats, TCP_NLA_DELIVERED_CE, READ_ONCE(tp->delivered_ce));
+
+ nla_put_u32(stats, TCP_NLA_SNDQ_SIZE,
+ max_t(int, 0,
+ READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_una)));
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
- nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
- TCP_NLA_PAD);
- nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
+ nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, READ_ONCE(tp->bytes_sent),
TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
- nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
- nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
- nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
+ nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS,
+ READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD);
+ nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups));
+ nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen));
+ nla_put_u32(stats, TCP_NLA_SRTT, READ_ONCE(tp->srtt_us) >> 3);
+ nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH,
+ READ_ONCE(tp->timeout_rehash));
nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT,
- max_t(int, 0, tp->write_seq - tp->snd_nxt));
+ max_t(int, 0,
+ READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt)));
nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
TCP_NLA_PAD);
if (ack_skb)
nla_put_u8(stats, TCP_NLA_TTL,
tcp_skb_ttl_or_hop_limit(ack_skb));
- nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash);
+ nla_put_u32(stats, TCP_NLA_REHASH,
+ READ_ONCE(tp->plb_rehash) + READ_ONCE(tp->timeout_rehash));
return stats;
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 1ddc20a399b0..aec7805b1d37 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -897,8 +897,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
- tcp_sk(sk)->snd_ssthresh =
- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh,
+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT));
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
@@ -1043,7 +1043,7 @@ __bpf_kfunc static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH);
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 58358bf92e1b..65444ff14241 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -74,7 +74,7 @@ static void bictcp_init(struct sock *sk)
bictcp_reset(ca);
if (initial_ssthresh)
- tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh);
}
/*
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index ceabfd690a29..0812c390aee5 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -162,7 +162,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
return;
}
}
@@ -181,7 +181,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ab78b5ae8d0e..119bf8cbb007 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -136,7 +136,7 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk)
bictcp_hystart_reset(sk);
if (!hystart && initial_ssthresh)
- tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh);
}
__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk)
@@ -420,7 +420,7 @@ static void hystart_update(struct sock *sk, u32 delay)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
@@ -440,7 +440,7 @@ static void hystart_update(struct sock *sk, u32 delay)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 96c99999e09d..274e628e7cf8 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -177,7 +177,7 @@ static void dctcp_react_to_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tcp_snd_cwnd(tp);
- tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ WRITE_ONCE(tp->snd_ssthresh, max(tcp_snd_cwnd(tp) >> 1U, 2U));
}
__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 021f745747c5..d5c9e65d9760 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -476,14 +476,14 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp,
static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
{
- tp->delivered_ce += ecn_count;
+ WRITE_ONCE(tp->delivered_ce, tp->delivered_ce + ecn_count);
}
/* Updates the delivered and delivered_ce counts */
static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
bool ece_ack)
{
- tp->delivered += delivered;
+ WRITE_ONCE(tp->delivered, tp->delivered + delivered);
if (tcp_ecn_mode_rfc3168(tp) && ece_ack)
tcp_count_delivered_ce(tp, delivered);
}
@@ -1132,7 +1132,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tcp_bpf_rtt(sk, mrtt_us, srtt);
}
- tp->srtt_us = max(1U, srtt);
+ WRITE_ONCE(tp->srtt_us, max(1U, srtt));
}
void tcp_update_pacing_rate(struct sock *sk)
@@ -1246,7 +1246,7 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
state->flag |= FLAG_DSACK_TLP;
- tp->dsack_dups += dup_segs;
+ WRITE_ONCE(tp->dsack_dups, tp->dsack_dups + dup_segs);
/* Skip the DSACK if dup segs weren't retransmitted by sender */
if (tp->dsack_dups > tp->total_retrans)
return 0;
@@ -1293,12 +1293,13 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
- tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
+ WRITE_ONCE(tp->reordering,
+ min_t(u32, (metric + mss - 1) / mss,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)));
}
/* This exciting event is worth to be remembered. 8) */
- tp->reord_seen++;
+ WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1);
NET_INC_STATS(sock_net(sk),
ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
@@ -2439,9 +2440,10 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
if (!tcp_limit_reno_sacked(tp))
return;
- tp->reordering = min_t(u32, tp->packets_out + addend,
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
- tp->reord_seen++;
+ WRITE_ONCE(tp->reordering,
+ min_t(u32, tp->packets_out + addend,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)));
+ WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2565,7 +2567,7 @@ void tcp_enter_loss(struct sock *sk)
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->prior_cwnd = tcp_snd_cwnd(tp);
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, icsk->icsk_ca_ops->ssthresh(sk));
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
@@ -2579,8 +2581,8 @@ void tcp_enter_loss(struct sock *sk)
reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
tp->sacked_out >= reordering)
- tp->reordering = min_t(unsigned int, tp->reordering,
- reordering);
+ WRITE_ONCE(tp->reordering,
+ min_t(unsigned int, tp->reordering, reordering));
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
@@ -2858,7 +2860,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
- tp->snd_ssthresh = tp->prior_ssthresh;
+ WRITE_ONCE(tp->snd_ssthresh, tp->prior_ssthresh);
tcp_ecn_withdraw_cwr(tp);
}
}
@@ -2976,7 +2978,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, inet_csk(sk)->icsk_ca_ops->ssthresh(sk));
tcp_ecn_queue_cwr(tp);
}
@@ -3118,7 +3120,7 @@ static void tcp_non_congestion_loss_retransmit(struct sock *sk)
if (icsk->icsk_ca_state != TCP_CA_Loss) {
tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
tp->prior_ssthresh = 0;
tp->undo_marker = 0;
tcp_set_ca_state(sk, TCP_CA_Loss);
@@ -3910,7 +3912,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
sock_owned_by_me((struct sock *)tp);
tp->bytes_acked += delta;
tcp_snd_sne_update(tp, ack);
- tp->snd_una = ack;
+ WRITE_ONCE(tp->snd_una, ack);
}
static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
@@ -4284,11 +4286,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto old_ack;
}
- /* If the ack includes data we haven't sent yet, discard
- * this segment (RFC793 Section 3.9).
+ /* If the ack includes data we haven't sent yet, drop the
+ * segment. RFC 793 Section 3.9 and RFC 5961 Section 5.2
+ * require us to send an ACK back in that case.
*/
- if (after(ack, tp->snd_nxt))
+ if (after(ack, tp->snd_nxt)) {
+ if (!(flag & FLAG_NO_CHALLENGE_ACK))
+ tcp_send_challenge_ack(sk, false);
return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
+ }
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
@@ -6777,7 +6783,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
/* SYN-data is counted as two separate packets in tcp_ack() */
if (tp->delivered > 1)
- --tp->delivered;
+ WRITE_ONCE(tp->delivered, tp->delivered - 1);
}
tcp_fastopen_add_skb(sk, synack);
@@ -7210,7 +7216,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
SKB_DR_SET(reason, NOT_SPECIFIED);
switch (sk->sk_state) {
case TCP_SYN_RECV:
- tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
+ WRITE_ONCE(tp->delivered, tp->delivered + 1); /* SYN-ACK delivery isn't tracked in tcp_ack */
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
@@ -7238,7 +7244,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_socket)
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+ WRITE_ONCE(tp->snd_una, TCP_SKB_CB(skb)->ack_seq);
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 06b1d5d3b6df..dc0c081fc1f3 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -490,13 +490,13 @@ void tcp_init_metrics(struct sock *sk)
val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
- tp->snd_ssthresh = val;
+ WRITE_ONCE(tp->snd_ssthresh, val);
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
- tp->snd_ssthresh = tp->snd_cwnd_clamp;
+ WRITE_ONCE(tp->snd_ssthresh, tp->snd_cwnd_clamp);
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val)
- tp->reordering = val;
+ WRITE_ONCE(tp->reordering, val);
crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index a60662f4bdf9..f345897a68df 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -396,8 +396,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
/* We have enough data to determine we are congested */
ca->nv_allow_cwnd_growth = 0;
- tp->snd_ssthresh =
- (nv_ssthresh_factor * max_win) >> 3;
+ WRITE_ONCE(tp->snd_ssthresh,
+ (nv_ssthresh_factor * max_win) >> 3);
if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e99687526a6..f9d8755705f7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -171,7 +171,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
restart_cwnd = min(restart_cwnd, cwnd);
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
@@ -1688,8 +1688,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
- tp->data_segs_out += tcp_skb_pcount(skb);
- tp->bytes_sent += skb->len - tcp_header_size;
+ WRITE_ONCE(tp->data_segs_out,
+ tp->data_segs_out + tcp_skb_pcount(skb));
+ WRITE_ONCE(tp->bytes_sent,
+ tp->bytes_sent + skb->len - tcp_header_size);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -2142,7 +2144,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
if (win_used < tcp_snd_cwnd(tp)) {
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
@@ -3642,8 +3644,8 @@ start:
TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
- tp->total_retrans += segs;
- tp->bytes_retrans += skb->len;
+ WRITE_ONCE(tp->total_retrans, tp->total_retrans + segs);
+ WRITE_ONCE(tp->bytes_retrans, tp->bytes_retrans + skb->len);
/* make sure skb->data is aligned on arches that require it
* and check if ack-trimming & collapsing extended the headroom
@@ -4152,7 +4154,7 @@ static void tcp_connect_init(struct sock *sk)
tp->snd_wnd = 0;
tcp_init_wl(tp, 0);
tcp_write_queue_purge(sk);
- tp->snd_una = tp->write_seq;
+ WRITE_ONCE(tp->snd_una, tp->write_seq);
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
WRITE_ONCE(tp->snd_nxt, tp->write_seq);
@@ -4646,7 +4648,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
* However in this case, we are dealing with a passive fastopen
* socket thus we can change total_retrans value.
*/
- tcp_sk_rw(sk)->total_retrans++;
+ WRITE_ONCE(tcp_sk_rw(sk)->total_retrans,
+ tcp_sk_rw(sk)->total_retrans + 1);
}
trace_tcp_retransmit_synack(sk, req);
WRITE_ONCE(req->num_retrans, req->num_retrans + 1);
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
index 68ccdb9a5412..c11a0cd3f8fe 100644
--- a/net/ipv4/tcp_plb.c
+++ b/net/ipv4/tcp_plb.c
@@ -80,7 +80,7 @@ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb)
sk_rethink_txhash(sk);
plb->consec_cong_rounds = 0;
- tcp_sk(sk)->plb_rehash++;
+ WRITE_ONCE(tcp_sk(sk)->plb_rehash, tcp_sk(sk)->plb_rehash + 1);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH);
}
EXPORT_SYMBOL_GPL(tcp_plb_check_rehash);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea99988795e7..8d791a954cd6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -297,7 +297,7 @@ static int tcp_write_timeout(struct sock *sk)
}
if (sk_rethink_txhash(sk)) {
- tp->timeout_rehash++;
+ WRITE_ONCE(tp->timeout_rehash, tp->timeout_rehash + 1);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 950a66966059..574453af6bc0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -245,7 +245,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
(u32)target_cwnd + 1));
- tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_vegas_ssthresh(tp));
} else if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -261,8 +262,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* we slow down.
*/
tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
- tp->snd_ssthresh
- = tcp_vegas_ssthresh(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_vegas_ssthresh(tp));
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
@@ -280,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
}
/* Wipe the slate clean for the next RTT. */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index c6e97141eef2..b5a42adfd6ca 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,11 +244,11 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk));
tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
- tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk));
/* Update RTT_min when next ack arrives */
w->reset_rtt_min = 1;
break;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index b22b3dccd05e..9e581154f18f 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -147,7 +147,8 @@ do_vegas:
tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
yeah->reno_count));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_snd_cwnd(tp));
}
if (yeah->reno_count <= 2)