diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/tcp.h | 4 | ||||
| -rw-r--r-- | include/net/netns/ipv4.h | 1 | ||||
| -rw-r--r-- | include/net/tcp.h | 3 | ||||
| -rw-r--r-- | include/net/tcp_ecn.h | 52 |
4 files changed, 59 insertions, 1 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 73557656cb2d..f637b659b35a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -275,6 +275,7 @@ struct tcp_sock { u32 mdev_us; /* medium deviation */ u32 rtt_seq; /* sequence number to update rttvar */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 accecn_opt_tstamp; /* Last AccECN option sent timestamp */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ struct sk_buff *highest_sack; /* skb just after the highest * skb with SACKed bit set @@ -296,7 +297,8 @@ struct tcp_sock { unused2:4; u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ est_ecnfield:2,/* ECN field for AccECN delivered estimates */ - unused3:4; + accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ + prev_ecnfield:2; /* ECN bits from the previous segment */ __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index acbb7dd497e1..34eb3aecb3f2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -149,6 +149,7 @@ struct netns_ipv4 { u8 sysctl_tcp_ecn; u8 sysctl_tcp_ecn_option; + u8 sysctl_tcp_ecn_option_beacon; u8 sysctl_tcp_ecn_fallback; u8 sysctl_ip_default_ttl; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6be29129465e..78dd7b8a4145 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -100,6 +100,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* Maximal number of window scale according to RFC1323 */ #define TCP_MAX_WSCALE 14U +/* Default sending frequency of accurate ECN option per RTT */ +#define TCP_ACCECN_OPTION_BEACON 3 + /* urg_data states */ #define TCP_URG_VALID 0x0100 #define TCP_URG_NOTYET 0x0200 diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 08c7f4757e4e..133fb6b79500 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -176,6 +176,17 @@ static inline void tcp_accecn_third_ack(struct sock *sk, } } +/* Demand the minimum # to send AccECN optnio */ +static inline void tcp_accecn_opt_demand_min(struct sock *sk, + u8 opt_demand_min) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 opt_demand; + + opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand); + tp->accecn_opt_demand = opt_demand; +} + /* Maps IP ECN field ECT/CE code point to AccECN option field number, given * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0). */ @@ -256,6 +267,7 @@ static inline void tcp_ecn_received_counters(struct sock *sk, u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; u8 is_ce = INET_ECN_is_ce(ecnfield); struct tcp_sock *tp = tcp_sk(sk); + bool ecn_edge; if (!INET_ECN_is_not_ect(ecnfield)) { u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs); @@ -274,9 +286,34 @@ static inline void tcp_ecn_received_counters(struct sock *sk, if (len > 0) { u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield); + u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1]; + u32 bytes_mask = GENMASK_U32(31, 22); + tp->received_ecn_bytes[ecnfield - 1] += len; tp->accecn_minlen = max_t(u8, tp->accecn_minlen, minlen); + + /* Send AccECN option at least once per 2^22-byte + * increase in any ECN byte counter. + */ + if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) & + bytes_mask) { + tcp_accecn_opt_demand_min(sk, 1); + } + } + } + + ecn_edge = tp->prev_ecnfield != ecnfield; + if (ecn_edge || is_ce) { + tp->prev_ecnfield = ecnfield; + /* Demand Accurate ECN change-triggered ACKs. Two ACK are + * demanded to indicate unambiguously the ecnfield value + * in the latter ACK. + */ + if (tcp_ecn_mode_accecn(tp)) { + if (ecn_edge) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + tp->accecn_opt_demand = 2; } } } @@ -349,6 +386,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); tp->accecn_minlen = 0; + tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; } @@ -431,6 +469,7 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th, default: tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; + tp->accecn_opt_demand = 2; if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { @@ -451,6 +490,7 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, } else { tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; + tp->prev_ecnfield = tp->syn_ect_rcv; tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); } } @@ -542,4 +582,16 @@ tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) th->ece = 1; } +static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) +{ + u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon); + const struct tcp_sock *tp = tcp_sk(sk); + + if (!ecn_beacon) + return false; + + return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >= + (tp->srtt_us >> 3); +} + #endif /* _LINUX_TCP_ECN_H */ |
