summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2026-02-03 15:13:30 +0100
committerPaolo Abeni <pabeni@redhat.com>2026-02-03 15:13:31 +0100
commit667539f6dce27aa7db0a711375f94e14e714a698 (patch)
treeb2d27d10227c9b688f9467ac7549e336cff4897b /include
parent72163a19093193e71755d8a2f26b2fdde6aea671 (diff)
parentf85d9c45f1d48a146f37cfd3d244aac4157ea390 (diff)
Merge branch 'accecn-protocol-case-handling-series'
Chia-Yu Chang says: ==================== AccECN protocol case handling series Plesae find the v13 AccECN case handling patch series, which covers several excpetional case handling of Accurate ECN spec (RFC9768), adds new identifiers to be used by CC modules, adds ecn_delta into rate_sample, and keeps the ACE counter for computation, etc. This patch series is part of the full AccECN patch series, which is at https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/ --- Chia-Yu Chang (13): selftests/net: gro: add self-test for TCP CWR flag tcp: ECT_1_NEGOTIATION and NEEDS_ACCECN identifiers tcp: disable RFC3168 fallback identifier for CC modules tcp: accecn: handle unexpected AccECN negotiation feedback tcp: accecn: retransmit downgraded SYN in AccECN negotiation tcp: add TCP_SYNACK_RETRANS synack_type tcp: accecn: retransmit SYN/ACK without AccECN option or non-AccECN SYN/ACK tcp: accecn: unset ECT if receive or send ACE=0 in AccECN negotiaion tcp: accecn: fallback outgoing half link to non-AccECN tcp: accecn: detect loss ACK w/ AccECN option and add TCP_ACCECN_OPTION_PERSIST tcp: accecn: add tcpi_ecn_mode and tcpi_option2 in tcp_info tcp: accecn: enable AccECN selftests/net: packetdrill: add TCP Accurate ECN cases Ilpo Järvinen (2): tcp: try to avoid safer when ACKs are thinned gro: flushing when CWR is set negatively affects AccECN Documentation/networking/ip-sysctl.rst | 4 +- .../networking/net_cachelines/tcp_sock.rst | 1 + include/linux/tcp.h | 4 +- include/net/inet_ecn.h | 20 +++- include/net/tcp.h | 32 +++++- include/net/tcp_ecn.h | 103 ++++++++++++------ include/uapi/linux/tcp.h | 26 ++++- net/ipv4/inet_connection_sock.c | 3 + net/ipv4/sysctl_net_ipv4.c | 4 +- net/ipv4/tcp.c | 10 ++ net/ipv4/tcp_cong.c | 5 +- net/ipv4/tcp_input.c | 40 ++++++- net/ipv4/tcp_minisocks.c | 43 +++++--- net/ipv4/tcp_offload.c | 3 +- net/ipv4/tcp_output.c | 34 ++++-- net/ipv4/tcp_timer.c | 3 + tools/testing/selftests/drivers/net/gro.c | 81 ++++++++++---- tools/testing/selftests/drivers/net/gro.py | 3 +- .../tcp_accecn_2nd_data_as_first.pkt | 24 ++++ .../tcp_accecn_2nd_data_as_first_connect.pkt | 30 +++++ .../tcp_accecn_3rd_ack_after_synack_rxmt.pkt | 19 ++++ ..._accecn_3rd_ack_ce_updates_received_ce.pkt | 18 +++ .../tcp_accecn_3rd_ack_lost_data_ce.pkt | 22 ++++ .../net/packetdrill/tcp_accecn_3rd_dups.pkt | 26 +++++ .../tcp_accecn_acc_ecn_disabled.pkt | 13 +++ .../tcp_accecn_accecn_then_notecn_syn.pkt | 28 +++++ .../tcp_accecn_accecn_to_rfc3168.pkt | 18 +++ .../tcp_accecn_client_accecn_options_drop.pkt | 34 ++++++ .../tcp_accecn_client_accecn_options_lost.pkt | 38 +++++++ .../tcp_accecn_clientside_disabled.pkt | 12 ++ ...cecn_close_local_close_then_remote_fin.pkt | 25 +++++ .../tcp_accecn_delivered_2ndlargeack.pkt | 25 +++++ ..._accecn_delivered_falseoverflow_detect.pkt | 31 ++++++ .../tcp_accecn_delivered_largeack.pkt | 24 ++++ .../tcp_accecn_delivered_largeack2.pkt | 25 +++++ .../tcp_accecn_delivered_maxack.pkt | 25 +++++ .../tcp_accecn_delivered_updates.pkt | 70 ++++++++++++ .../net/packetdrill/tcp_accecn_ecn3.pkt | 12 ++ .../tcp_accecn_ecn_field_updates_opt.pkt | 35 ++++++ .../packetdrill/tcp_accecn_ipflags_drop.pkt | 14 +++ .../tcp_accecn_listen_opt_drop.pkt | 16 +++ .../tcp_accecn_multiple_syn_ack_drop.pkt | 28 +++++ .../tcp_accecn_multiple_syn_drop.pkt | 18 +++ .../tcp_accecn_negotiation_bleach.pkt | 23 ++++ .../tcp_accecn_negotiation_connect.pkt | 23 ++++ .../tcp_accecn_negotiation_listen.pkt | 26 +++++ .../tcp_accecn_negotiation_noopt_connect.pkt | 23 ++++ .../tcp_accecn_negotiation_optenable.pkt | 23 ++++ .../tcp_accecn_no_ecn_after_accecn.pkt | 20 ++++ .../net/packetdrill/tcp_accecn_noopt.pkt | 27 +++++ .../net/packetdrill/tcp_accecn_noprogress.pkt | 27 +++++ .../tcp_accecn_notecn_then_accecn_syn.pkt | 28 +++++ .../tcp_accecn_rfc3168_to_fallback.pkt | 18 +++ .../tcp_accecn_rfc3168_to_rfc3168.pkt | 18 +++ .../tcp_accecn_sack_space_grab.pkt | 28 +++++ .../tcp_accecn_sack_space_grab_with_ts.pkt | 39 +++++++ ...tcp_accecn_serverside_accecn_disabled1.pkt | 20 ++++ ...tcp_accecn_serverside_accecn_disabled2.pkt | 20 ++++ .../tcp_accecn_serverside_broken.pkt | 19 ++++ .../tcp_accecn_serverside_ecn_disabled.pkt | 19 ++++ .../tcp_accecn_serverside_only.pkt | 18 +++ ...n_syn_ace_flags_acked_after_retransmit.pkt | 18 +++ .../tcp_accecn_syn_ace_flags_drop.pkt | 16 +++ ...n_ack_ace_flags_acked_after_retransmit.pkt | 27 +++++ .../tcp_accecn_syn_ack_ace_flags_drop.pkt | 26 +++++ .../net/packetdrill/tcp_accecn_syn_ce.pkt | 13 +++ .../net/packetdrill/tcp_accecn_syn_ect0.pkt | 13 +++ .../net/packetdrill/tcp_accecn_syn_ect1.pkt | 13 +++ .../net/packetdrill/tcp_accecn_synack_ce.pkt | 27 +++++ ..._accecn_synack_ce_updates_delivered_ce.pkt | 22 ++++ .../packetdrill/tcp_accecn_synack_ect0.pkt | 24 ++++ .../packetdrill/tcp_accecn_synack_ect1.pkt | 24 ++++ .../packetdrill/tcp_accecn_synack_rexmit.pkt | 15 +++ .../packetdrill/tcp_accecn_synack_rxmt.pkt | 25 +++++ .../packetdrill/tcp_accecn_tsnoprogress.pkt | 26 +++++ .../net/packetdrill/tcp_accecn_tsprogress.pkt | 25 +++++ 76 files changed, 1680 insertions(+), 102 deletions(-) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt ==================== Link: https://patch.msgid.link/20260131222515.8485-1-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/tcp.h4
-rw-r--r--include/net/inet_ecn.h20
-rw-r--r--include/net/tcp.h32
-rw-r--r--include/net/tcp_ecn.h103
-rw-r--r--include/uapi/linux/tcp.h26
5 files changed, 142 insertions, 43 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 367b491ddf97..f72eef31fa23 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -291,7 +291,8 @@ struct tcp_sock {
u8 nonagle : 4,/* Disable Nagle algorithm? */
rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */
- unused2:4;
+ accecn_opt_sent_w_dsack:1,/* Sent ACCECN opt in previous ACK w/ D-SACK */
+ unused2:3;
u8 accecn_minlen:2,/* Minimum length of AccECN option sent */
est_ecnfield:2,/* ECN field for AccECN delivered estimates */
accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
@@ -342,6 +343,7 @@ struct tcp_sock {
u32 rate_interval_us; /* saved rate sample: time elapsed */
u32 rcv_rtt_last_tsecr;
u32 delivered_ecn_bytes[3];
+ u16 pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */
u64 first_tx_mstamp; /* start of window send phase */
u64 delivered_mstamp; /* time we reached "delivered" */
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index ea32393464a2..827b87a95dab 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
return outer;
}
+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk)) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}
static inline void INET_ECN_dontxmit(struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cecec1a92d5e..6c12be2cdd4d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -552,6 +552,7 @@ enum tcp_synack_type {
TCP_SYNACK_NORMAL,
TCP_SYNACK_FASTOPEN,
TCP_SYNACK_COOKIE,
+ TCP_SYNACK_RETRANS,
};
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
@@ -1215,7 +1216,15 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN BIT(1)
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Require successfully negotiated AccECN capability */
+#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_ECT_1_NEGOTIATION BIT(3)
+/* Cannot fallback to RFC3168 during AccECN negotiation */
+#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4)
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \
+ TCP_CONG_NO_FALLBACK_RFC3168)
union tcp_cc_info;
@@ -1356,6 +1365,27 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}
+static inline bool tcp_ca_needs_accecn(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
+}
+
+static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION;
+}
+
+static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index f13e5cd2b1ac..e9a933641636 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -29,8 +29,15 @@ enum tcp_accecn_option {
TCP_ACCECN_OPTION_DISABLED = 0,
TCP_ACCECN_OPTION_MINIMUM = 1,
TCP_ACCECN_OPTION_FULL = 2,
+ TCP_ACCECN_OPTION_PERSIST = 3,
};
+/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
+static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk)
+{
+ __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk));
+}
+
static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
{
/* Do not set CWR if in AccECN mode! */
@@ -60,12 +67,6 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
}
-/* tp->accecn_fail_mode */
-#define TCP_ACCECN_ACE_FAIL_SEND BIT(0)
-#define TCP_ACCECN_ACE_FAIL_RECV BIT(1)
-#define TCP_ACCECN_OPT_FAIL_SEND BIT(2)
-#define TCP_ACCECN_OPT_FAIL_RECV BIT(3)
-
static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
{
return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
@@ -91,11 +92,6 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
tp->accecn_fail_mode |= mode;
}
-#define TCP_ACCECN_OPT_NOT_SEEN 0x0
-#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
-#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
-#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
-
static inline u8 tcp_accecn_ace(const struct tcphdr *th)
{
return (th->ae << 2) | (th->cwr << 1) | th->ece;
@@ -169,7 +165,9 @@ static inline void tcp_accecn_third_ack(struct sock *sk,
switch (ace) {
case 0x0:
/* Invalid value */
- tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
+ if (!TCP_SKB_CB(skb)->sacked)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV |
+ TCP_ACCECN_OPT_FAIL_RECV);
break;
case 0x7:
case 0x5:
@@ -398,6 +396,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
tp->received_ce_pending = 0;
__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
+ tp->accecn_opt_sent_w_dsack = 0;
tp->accecn_minlen = 0;
tp->accecn_opt_demand = 0;
tp->est_ecnfield = 0;
@@ -467,6 +466,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
return TCP_ACCECN_OPT_COUNTER_SEEN;
}
+static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk,
+ const struct sk_buff *skb, u8 dsf)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
+ tp->syn_ect_rcv = dsf & INET_ECN_MASK;
+ /* Demand Accurate ECN option in response to the SYN on the SYN/ACK
+ * and the TCP server will try to send one more packet with an AccECN
+ * Option at a later point during the connection.
+ */
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tp->accecn_opt_demand = 2;
+ }
+}
+
/* See Table 2 of the AccECN draft */
static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
const struct tcphdr *th, u8 ip_dsfield)
@@ -489,32 +508,32 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
break;
case 0x1:
- case 0x5:
/* +========+========+============+=============+
* | A | B | SYN/ACK | Feedback |
* | | | B->A | Mode of A |
* | | | AE CWR ECE | |
* +========+========+============+=============+
- * | AccECN | Nonce | 1 0 1 | (Reserved) |
* | AccECN | ECN | 0 0 1 | Classic ECN |
* | Nonce | AccECN | 0 0 1 | Classic ECN |
* | ECN | AccECN | 0 0 1 | Classic ECN |
* +========+========+============+=============+
*/
- if (tcp_ecn_mode_pending(tp))
- /* Downgrade from AccECN, or requested initially */
+ if (tcp_ca_no_fallback_rfc3168(sk))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+ else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
- default:
- tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
- tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
- if (tp->rx_opt.accecn &&
- tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
- u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
-
- tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
- tp->accecn_opt_demand = 2;
+ case 0x5:
+ if (tcp_ecn_mode_pending(tp)) {
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
+ if (INET_ECN_is_ce(ip_dsfield)) {
+ tp->received_ce++;
+ tp->received_ce_pending++;
+ }
}
+ break;
+ default:
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
if (INET_ECN_is_ce(ip_dsfield) &&
tcp_accecn_validate_syn_feedback(sk, ace,
tp->syn_ect_snt)) {
@@ -525,9 +544,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
}
}
-static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
+static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th,
const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_ecn_mode_pending(tp)) {
if (!tcp_accecn_syn_requested(th)) {
/* Downgrade to classic ECN feedback */
@@ -539,7 +560,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
}
}
- if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ if (tcp_ecn_mode_rfc3168(tp) &&
+ (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk)))
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
}
@@ -561,7 +583,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
@@ -579,7 +601,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
bool use_ecn, use_accecn;
u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
- use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
+ use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN ||
+ tcp_ca_needs_accecn(sk);
use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
@@ -595,7 +618,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
@@ -619,12 +642,22 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
}
static inline void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
+ enum tcp_synack_type synack_type)
{
- if (tcp_rsk(req)->accecn_ok)
- tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
- else if (inet_rsk(req)->ecn_ok)
- th->ece = 1;
+ /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the
+ * previously retransmitted SYN/ACK also times out.
+ */
+ if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) {
+ if (tcp_rsk(req)->accecn_ok)
+ tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
+ else if (inet_rsk(req)->ecn_ok)
+ th->ece = 1;
+ } else if (tcp_rsk(req)->accecn_ok) {
+ th->ae = 0;
+ th->cwr = 0;
+ th->ece = 0;
+ }
}
static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index dce3113787a7..03772dd4d399 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -226,6 +226,24 @@ enum tcp_ca_state {
#define TCPF_CA_Loss (1<<TCP_CA_Loss)
};
+/* Values for tcpi_ecn_mode after negotiation */
+#define TCPI_ECN_MODE_DISABLED 0x0
+#define TCPI_ECN_MODE_RFC3168 0x1
+#define TCPI_ECN_MODE_ACCECN 0x2
+#define TCPI_ECN_MODE_PENDING 0x3
+
+/* Values for accecn_opt_seen */
+#define TCP_ACCECN_OPT_NOT_SEEN 0x0
+#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
+#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
+#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
+
+/* Values for accecn_fail_mode */
+#define TCP_ACCECN_ACE_FAIL_SEND BIT(0)
+#define TCP_ACCECN_ACE_FAIL_RECV BIT(1)
+#define TCP_ACCECN_OPT_FAIL_SEND BIT(2)
+#define TCP_ACCECN_OPT_FAIL_RECV BIT(3)
+
struct tcp_info {
__u8 tcpi_state;
__u8 tcpi_ca_state;
@@ -316,15 +334,17 @@ struct tcp_info {
* in milliseconds, including any
* unfinished recovery.
*/
- __u32 tcpi_received_ce; /* # of CE marks received */
+ __u32 tcpi_received_ce; /* # of CE marked segments received */
__u32 tcpi_delivered_e1_bytes; /* Accurate ECN byte counters */
__u32 tcpi_delivered_e0_bytes;
__u32 tcpi_delivered_ce_bytes;
__u32 tcpi_received_e1_bytes;
__u32 tcpi_received_e0_bytes;
__u32 tcpi_received_ce_bytes;
- __u16 tcpi_accecn_fail_mode;
- __u16 tcpi_accecn_opt_seen;
+ __u32 tcpi_ecn_mode:2,
+ tcpi_accecn_opt_seen:2,
+ tcpi_accecn_fail_mode:4,
+ tcpi_options2:24;
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */