From 70e939ddea7f014b94fe001db65c3efc986e4add Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 10:59:17 -0700 Subject: net: add skb_get_dsfield() helper skb_get_dsfield(skb) gets dsfield from skb, or -1 if an error was found. This is basically a wrapper around ipv4_get_dsfield() and ipv6_get_dsfield(). Used by following patch for fq_codel. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Ingemar Johansson S Cc: Tom Henderson Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ba77f47ef61e..ea32393464a2 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -188,6 +188,23 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) return 0; } +static inline int skb_get_dsfield(struct sk_buff *skb) +{ + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + break; + return ipv4_get_dsfield(ip_hdr(skb)); + + case cpu_to_be16(ETH_P_IPV6): + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + break; + return ipv6_get_dsfield(ipv6_hdr(skb)); + } + + return -1; +} + static inline int INET_ECN_set_ect1(struct sk_buff *skb) { switch (skb_protocol(skb, true)) { -- cgit v1.2.3 From e72aeb9ee0e34c57dc90793d0bf82cab9624d64e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2021 10:59:18 -0700 Subject: fq_codel: implement L4S style ce_threshold_ect1 marking Add TCA_FQ_CODEL_CE_THRESHOLD_ECT1 boolean option to select Low Latency, Low Loss, Scalable Throughput (L4S) style marking, along with ce_threshold. If enabled, only packets with ECT(1) can be transformed to CE if their sojourn time is above the ce_threshold. Note that this new option does not change rules for codel law. In particular, if TCA_FQ_CODEL_ECN is left enabled (this is the default when fq_codel qdisc is created), ECT(0) packets can still get CE if codel law (as governed by limit/target) decides so. Section 4.3.b of current draft [1] states: b. A scheduler with per-flow queues such as FQ-CoDel or FQ-PIE can be used for L4S. For instance within each queue of an FQ-CoDel system, as well as a CoDel AQM, there is typically also ECN marking at an immediate (unsmoothed) shallow threshold to support use in data centres (see Sec.5.2.7 of [RFC8290]). This can be modified so that the shallow threshold is solely applied to ECT(1) packets. Then if there is a flow of non-ECN or ECT(0) packets in the per-flow-queue, the Classic AQM (e.g. CoDel) is applied; while if there is a flow of ECT(1) packets in the queue, the shallower (typically sub-millisecond) threshold is applied. Tested: tc qd replace dev eth1 root fq_codel ce_threshold_ect1 50usec netperf ... -t TCP_STREAM -- K dctcp tc -s -d qd sh dev eth1 qdisc fq_codel 8022: root refcnt 32 limit 10240p flows 1024 quantum 9212 target 5ms ce_threshold_ect1 49us interval 100ms memory_limit 32Mb ecn drop_batch 64 Sent 14388596616 bytes 9543449 pkt (dropped 0, overlimits 0 requeues 152013) backlog 0b 0p requeues 152013 maxpacket 68130 drop_overlimit 0 new_flow_count 95678 ecn_mark 0 ce_mark 7639 new_flows_len 0 old_flows_len 0 [1] L4S current draft: https://datatracker.ietf.org/doc/html/draft-ietf-tsvwg-l4s-arch Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Ingemar Johansson S Cc: Tom Henderson Cc: Bob Briscoe Signed-off-by: David S. Miller --- include/net/codel.h | 2 ++ include/net/codel_impl.h | 18 +++++++++++++++--- include/uapi/linux/pkt_sched.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/codel.h b/include/net/codel.h index a6e428f80135..5e8b181b76b8 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -102,6 +102,7 @@ static inline u32 codel_time_to_us(codel_time_t val) * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled + * @ce_threshold_ect1: if ce_threshold only marks ECT(1) packets */ struct codel_params { codel_time_t target; @@ -109,6 +110,7 @@ struct codel_params { codel_time_t interval; u32 mtu; bool ecn; + bool ce_threshold_ect1; }; /** diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index d289b91dcd65..7af2c3eb3c43 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -54,6 +54,7 @@ static void codel_params_init(struct codel_params *params) params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; + params->ce_threshold_ect1 = false; params->ecn = false; } @@ -246,9 +247,20 @@ static struct sk_buff *codel_dequeue(void *ctx, vars->rec_inv_sqrt); } end: - if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && - INET_ECN_set_ce(skb)) - stats->ce_mark++; + if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { + bool set_ce = true; + + if (params->ce_threshold_ect1) { + /* Note: if skb_get_dsfield() returns -1, following + * gives INET_ECN_MASK, which is != INET_ECN_ECT_1. + */ + u8 ecn = skb_get_dsfield(skb) & INET_ECN_MASK; + + set_ce = (ecn == INET_ECN_ECT_1); + } + if (set_ce && INET_ECN_set_ce(skb)) + stats->ce_mark++; + } return skb; } diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ec88590b3198..6be9a84cccfa 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -840,6 +840,7 @@ enum { TCA_FQ_CODEL_CE_THRESHOLD, TCA_FQ_CODEL_DROP_BATCH_SIZE, TCA_FQ_CODEL_MEMORY_LIMIT, + TCA_FQ_CODEL_CE_THRESHOLD_ECT1, __TCA_FQ_CODEL_MAX }; -- cgit v1.2.3