diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 201 |
1 files changed, 37 insertions, 164 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ec92dec321a..19a1542883df 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1451,11 +1451,6 @@ static u8 tcp_sacktag_one(struct sock *sk, tp->sacked_out += pcount; /* Out-of-order packets delivered */ state->sack_delivered += pcount; - - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (tp->lost_skb_hint && - before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += pcount; } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1496,9 +1491,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); - if (skb == tp->lost_skb_hint) - tp->lost_cnt_hint += pcount; - TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; @@ -1531,10 +1523,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->lost_skb_hint) { - tp->lost_skb_hint = prev; - tp->lost_cnt_hint -= tcp_skb_pcount(prev); - } TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; @@ -2151,12 +2139,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp) tp->undo_retrans = -1; } -static bool tcp_is_rack(const struct sock *sk) -{ - return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & - TCP_RACK_LOSS_DETECTION; -} - /* If we detect SACK reneging, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. @@ -2182,8 +2164,7 @@ static void tcp_timeout_mark_lost(struct sock *sk) skb_rbtree_walk_from(skb) { if (is_reneg) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; - else if (tcp_is_rack(sk) && skb != head && - tcp_rack_skb_timeout(tp, skb, 0) > 0) + else if (skb != head && tcp_rack_skb_timeout(tp, skb, 0) > 0) continue; /* Don't mark recently sent ones lost yet */ tcp_mark_skb_lost(sk, skb); } @@ -2264,22 +2245,6 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) return false; } -/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs - * counter when SACK is enabled (without SACK, sacked_out is used for - * that purpose). - * - * With reordering, holes may still be in flight, so RFC3517 recovery - * uses pure sacked_out (total number of SACKed segments) even though - * it violates the RFC that uses duplicate ACKs, often these are equal - * but when e.g. out-of-window ACKs or packet duplication occurs, - * they differ. Since neither occurs due to loss, TCP should really - * ignore them. - */ -static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) -{ - return tp->sacked_out + 1; -} - /* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * @@ -2332,13 +2297,7 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * * If the receiver supports SACK: * - * RFC6675/3517: It is the conventional algorithm. A packet is - * considered lost if the number of higher sequence packets - * SACKed is greater than or equal the DUPACK thoreshold - * (reordering). This is implemented in tcp_mark_head_lost and - * tcp_update_scoreboard. - * - * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm + * RACK (RFC8985): RACK is a newer loss detection algorithm * (2017-) that checks timing instead of counting DUPACKs. * Essentially a packet is considered lost if it's not S/ACKed * after RTT + reordering_window, where both metrics are @@ -2353,8 +2312,8 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * is lost (NewReno). This heuristics are the same in NewReno * and SACK. * - * Really tricky (and requiring careful tuning) part of algorithm - * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). + * The really tricky (and requiring careful tuning) part of the algorithm + * is hidden in the RACK code in tcp_recovery.c and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, * hence, slow down forward transmission. In fact, it determines the moment * when we decide that hole is caused by loss, rather than by a reorder. @@ -2377,83 +2336,10 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * Main question: may we further continue forward transmission * with the same cwnd? */ -static bool tcp_time_to_recover(struct sock *sk, int flag) +static bool tcp_time_to_recover(const struct tcp_sock *tp) { - struct tcp_sock *tp = tcp_sk(sk); - - /* Trick#1: The loss is proven. */ - if (tp->lost_out) - return true; - - /* Not-A-Trick#2 : Classic rule... */ - if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering) - return true; - - return false; -} - -/* Detect loss in event "A" above by marking head of queue up as lost. - * For RFC3517 SACK, a segment is considered lost if it - * has at least tp->reordering SACKed seqments above it; "packets" refers to - * the maximum SACKed segments to pass before reaching this limit. - */ -static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int cnt; - /* Use SACK to deduce losses of new sequences sent during recovery */ - const u32 loss_high = tp->snd_nxt; - - WARN_ON(packets > tp->packets_out); - skb = tp->lost_skb_hint; - if (skb) { - /* Head already handled? */ - if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una)) - return; - cnt = tp->lost_cnt_hint; - } else { - skb = tcp_rtx_queue_head(sk); - cnt = 0; - } - - skb_rbtree_walk_from(skb) { - /* TODO: do this better */ - /* this is not the most efficient way to do this... */ - tp->lost_skb_hint = skb; - tp->lost_cnt_hint = cnt; - - if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) - break; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) - cnt += tcp_skb_pcount(skb); - - if (cnt > packets) - break; - - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) - tcp_mark_skb_lost(sk, skb); - - if (mark_head) - break; - } - tcp_verify_left_out(tp); -} - -/* Account newly detected lost packet(s) */ - -static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (tcp_is_sack(tp)) { - int sacked_upto = tp->sacked_out - tp->reordering; - if (sacked_upto >= 0) - tcp_mark_head_lost(sk, sacked_upto, 0); - else if (fast_rexmit) - tcp_mark_head_lost(sk, 1, 1); - } + /* Has loss detection marked at least one packet lost? */ + return tp->lost_out != 0; } static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) @@ -2479,20 +2365,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; - if (tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) - return true; /* got echoed TS before first retransmission */ + /* Received an echoed timestamp before the first retransmission? */ + if (tp->retrans_stamp) + return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + + /* We set tp->retrans_stamp upon the first retransmission of a loss + * recovery episode, so normally if tp->retrans_stamp is 0 then no + * retransmission has happened yet (likely due to TSQ, which can cause + * fast retransmits to be delayed). So if snd_una advanced while + * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, + * not lost. But there are exceptions where we retransmit but then + * clear tp->retrans_stamp, so we check for those exceptions. + */ - /* Check if nothing was retransmitted (retrans_stamp==0), which may - * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp - * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear - * retrans_stamp even if we had retransmitted the SYN. + /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() + * clears tp->retrans_stamp when snd_una == high_seq. */ - if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ - sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ - return true; /* nothing was retransmitted */ + if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) + return false; - return false; + /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp + * when setting FLAG_SYN_ACKED is set, even if the SYN was + * retransmitted. + */ + if (sk->sk_state == TCP_SYN_SENT) + return false; + + return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ @@ -2881,8 +2780,6 @@ void tcp_simple_retransmit(struct sock *sk) tcp_mark_skb_lost(sk, skb); } - tcp_clear_retrans_hints_partial(tp); - if (!tp->lost_out) return; @@ -2990,17 +2887,8 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, *rexmit = REXMIT_LOST; } -static bool tcp_force_fast_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - return after(tcp_highest_sack_seq(tp), - tp->snd_una + tp->reordering * tp->mss_cache); -} - /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una, - bool *do_lost) +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); @@ -3025,9 +2913,6 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una, tcp_undo_cwnd_reduction(sk, true); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); tcp_try_keep_open(sk); - } else { - /* Partial ACK arrived. Force fast retransmit. */ - *do_lost = tcp_force_fast_retransmit(sk); } return false; } @@ -3041,7 +2926,7 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag) if (unlikely(tcp_is_reno(tp))) { tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED); - } else if (tcp_is_rack(sk)) { + } else { u32 prior_retrans = tp->retrans_out; if (tcp_rack_mark_lost(sk)) @@ -3068,10 +2953,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int fast_rexmit = 0, flag = *ack_flag; + int flag = *ack_flag; bool ece_ack = flag & FLAG_ECE; - bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) && - tcp_force_fast_retransmit(sk)); if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; @@ -3120,7 +3003,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp)) tcp_add_reno_sack(sk, num_dupack, ece_ack); - } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost)) + } else if (tcp_try_undo_partial(sk, prior_snd_una)) return; if (tcp_try_undo_dsack(sk)) @@ -3128,7 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tcp_identify_packet_loss(sk, ack_flag); if (icsk->icsk_ca_state != TCP_CA_Recovery) { - if (!tcp_time_to_recover(sk, flag)) + if (!tcp_time_to_recover(tp)) return; /* Undo reverts the recovery state. If loss is evident, * starts a new recovery (e.g. reordering then loss); @@ -3157,7 +3040,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tcp_try_undo_dsack(sk); tcp_identify_packet_loss(sk, ack_flag); - if (!tcp_time_to_recover(sk, flag)) { + if (!tcp_time_to_recover(tp)) { tcp_try_to_open(sk, flag); return; } @@ -3175,11 +3058,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, /* Otherwise enter Recovery state */ tcp_enter_recovery(sk, ece_ack); - fast_rexmit = 1; } - if (!tcp_is_rack(sk) && do_lost) - tcp_update_scoreboard(sk, fast_rexmit); *rexmit = REXMIT_LOST; } @@ -3435,8 +3315,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; - if (unlikely(skb == tp->lost_skb_hint)) - tp->lost_skb_hint = NULL; tcp_highest_sack_replace(sk, skb, next); tcp_rtx_queue_unlink_and_free(skb, sk); } @@ -3494,14 +3372,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (flag & FLAG_RETRANS_DATA_ACKED) flag &= ~FLAG_ORIG_SACK_ACKED; } else { - int delta; - /* Non-retransmitted hole got filled? That's reordering */ if (before(reord, prior_fack)) tcp_check_sack_reordering(sk, reord, 0); - - delta = prior_sacked - tp->sacked_out; - tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, |
