summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/pkt_sched.h8
-rw-r--r--include/uapi/linux/pkt_sched.h1
-rw-r--r--net/sched/sch_fq.c148
3 files changed, 110 insertions, 47 deletions
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15960564e0c3..9fa1d0794dfa 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -20,10 +20,10 @@ struct qdisc_walker {
int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
};
-static inline void *qdisc_priv(struct Qdisc *q)
-{
- return &q->privdata;
-}
+#define qdisc_priv(q) \
+ _Generic(q, \
+ const struct Qdisc * : (const void *)&q->privdata, \
+ struct Qdisc * : (void *)&q->privdata)
static inline struct Qdisc *qdisc_from_priv(void *priv)
{
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 3f85ae578056..579f641846b8 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -962,6 +962,7 @@ struct tc_fq_qd_stats {
__u64 ce_mark; /* packets above ce_threshold */
__u64 horizon_drops;
__u64 horizon_caps;
+ __u64 fastpath_packets;
};
/* Heavy-Hitter Filter */
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f59a2cb2c803..681bbf34b707 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -2,7 +2,7 @@
/*
* net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
*
- * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2013-2023 Eric Dumazet <edumazet@google.com>
*
* Meant to be mostly used for locally generated traffic :
* Fast classification depends on skb->sk being set before reaching us.
@@ -73,7 +73,13 @@ struct fq_flow {
struct sk_buff *tail; /* last skb in the list */
unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
};
- struct rb_node fq_node; /* anchor in fq_root[] trees */
+ union {
+ struct rb_node fq_node; /* anchor in fq_root[] trees */
+ /* Following field is only used for q->internal,
+ * because q->internal is not hashed in fq_root[]
+ */
+ u64 stat_fastpath_packets;
+ };
struct sock *sk;
u32 socket_hash; /* sk_hash */
int qlen; /* number of packets in flow queue */
@@ -104,6 +110,9 @@ struct fq_sched_data {
unsigned long unthrottle_latency_ns;
struct fq_flow internal; /* for non classified or high prio packets */
+
+/* Read mostly cache line */
+
u32 quantum;
u32 initial_quantum;
u32 flow_refill_delay;
@@ -117,22 +126,27 @@ struct fq_sched_data {
u8 rate_enable;
u8 fq_trees_log;
u8 horizon_drop;
+ u32 timer_slack; /* hrtimer slack in ns */
+
+/* Read/Write fields. */
+
u32 flows;
- u32 inactive_flows;
+ u32 inactive_flows; /* Flows with no packet to send. */
u32 throttled_flows;
- u64 stat_gc_flows;
- u64 stat_internal_packets;
u64 stat_throttled;
+ struct qdisc_watchdog watchdog;
+ u64 stat_gc_flows;
+
+/* Seldom used fields. */
+
+ u64 stat_internal_packets; /* aka highprio */
u64 stat_ce_mark;
u64 stat_horizon_drops;
u64 stat_horizon_caps;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
-
- u32 timer_slack; /* hrtimer slack in ns */
- struct qdisc_watchdog watchdog;
};
/*
@@ -258,17 +272,64 @@ static void fq_gc(struct fq_sched_data *q,
kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
}
-static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
+/* Fast path can be used if :
+ * 1) Packet tstamp is in the past.
+ * 2) FQ qlen == 0 OR
+ * (no flow is currently eligible for transmit,
+ * AND fast path queue has less than 8 packets)
+ * 3) No SO_MAX_PACING_RATE on the socket (if any).
+ * 4) No @maxrate attribute on this qdisc,
+ *
+ * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure.
+ */
+static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb)
+{
+ const struct fq_sched_data *q = qdisc_priv(sch);
+ const struct sock *sk;
+
+ if (fq_skb_cb(skb)->time_to_send > q->ktime_cache)
+ return false;
+
+ if (sch->q.qlen != 0) {
+ /* Even if some packets are stored in this qdisc,
+ * we can still enable fast path if all of them are
+ * scheduled in the future (ie no flows are eligible)
+ * or in the fast path queue.
+ */
+ if (q->flows != q->inactive_flows + q->throttled_flows)
+ return false;
+
+ /* Do not allow fast path queue to explode, we want Fair Queue mode
+ * under pressure.
+ */
+ if (q->internal.qlen >= 8)
+ return false;
+ }
+
+ sk = skb->sk;
+ if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) &&
+ sk->sk_max_pacing_rate != ~0UL)
+ return false;
+
+ if (q->flow_max_rate != ~0UL)
+ return false;
+
+ return true;
+}
+
+static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb)
{
+ struct fq_sched_data *q = qdisc_priv(sch);
struct rb_node **p, *parent;
struct sock *sk = skb->sk;
struct rb_root *root;
struct fq_flow *f;
/* warning: no starvation prevention... */
- if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
+ if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) {
+ q->stat_internal_packets++; /* highprio packet */
return &q->internal;
-
+ }
/* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
* or a listener (SYNCOOKIE mode)
* 1) request sockets are not full blown,
@@ -299,11 +360,14 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
sk = (struct sock *)((hash << 1) | 1UL);
}
+ if (fq_fastpath_check(sch, skb)) {
+ q->internal.stat_fastpath_packets++;
+ return &q->internal;
+ }
+
root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
- if (q->flows >= (2U << q->fq_trees_log) &&
- q->inactive_flows > q->flows/2)
- fq_gc(q, root, sk);
+ fq_gc(q, root, sk);
p = &root->rb_node;
parent = NULL;
@@ -396,7 +460,6 @@ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
{
fq_erase_head(sch, flow, skb);
skb_mark_not_on_list(skb);
- flow->qlen--;
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
}
@@ -448,49 +511,45 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
+ q->ktime_cache = ktime_get_ns();
if (!skb->tstamp) {
- fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+ fq_skb_cb(skb)->time_to_send = q->ktime_cache;
} else {
- /* Check if packet timestamp is too far in the future.
- * Try first if our cached value, to avoid ktime_get_ns()
- * cost in most cases.
- */
+ /* Check if packet timestamp is too far in the future. */
if (fq_packet_beyond_horizon(skb, q)) {
- /* Refresh our cache and check another time */
- q->ktime_cache = ktime_get_ns();
- if (fq_packet_beyond_horizon(skb, q)) {
- if (q->horizon_drop) {
+ if (q->horizon_drop) {
q->stat_horizon_drops++;
return qdisc_drop(skb, sch, to_free);
- }
- q->stat_horizon_caps++;
- skb->tstamp = q->ktime_cache + q->horizon;
}
+ q->stat_horizon_caps++;
+ skb->tstamp = q->ktime_cache + q->horizon;
}
fq_skb_cb(skb)->time_to_send = skb->tstamp;
}
- f = fq_classify(skb, q);
- if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
- q->stat_flows_plimit++;
- return qdisc_drop(skb, sch, to_free);
- }
+ f = fq_classify(sch, skb);
- f->qlen++;
- qdisc_qstats_backlog_inc(sch, skb);
- if (fq_flow_is_detached(f)) {
- fq_flow_add_tail(&q->new_flows, f);
- if (time_after(jiffies, f->age + q->flow_refill_delay))
- f->credit = max_t(u32, f->credit, q->quantum);
- q->inactive_flows--;
+ if (f != &q->internal) {
+ if (unlikely(f->qlen >= q->flow_plimit)) {
+ q->stat_flows_plimit++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+
+ if (fq_flow_is_detached(f)) {
+ fq_flow_add_tail(&q->new_flows, f);
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
+ }
+
+ if (f->qlen == 0)
+ q->inactive_flows--;
}
+ f->qlen++;
/* Note: this overwrites f->age */
flow_queue_add(f, skb);
- if (unlikely(f == &q->internal)) {
- q->stat_internal_packets++;
- }
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -538,6 +597,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
skb = fq_peek(&q->internal);
if (unlikely(skb)) {
+ q->internal.qlen--;
fq_dequeue_skb(sch, &q->internal, skb);
goto out;
}
@@ -581,6 +641,8 @@ begin:
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
+ if (--f->qlen == 0)
+ q->inactive_flows++;
fq_dequeue_skb(sch, f, skb);
} else {
head->first = f->next;
@@ -589,7 +651,6 @@ begin:
fq_flow_add_tail(&q->old_flows, f);
} else {
fq_flow_set_detached(f);
- q->inactive_flows++;
}
goto begin;
}
@@ -1014,6 +1075,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.gc_flows = q->stat_gc_flows;
st.highprio_packets = q->stat_internal_packets;
+ st.fastpath_packets = q->internal.stat_fastpath_packets;
st.tcp_retrans = 0;
st.throttled = q->stat_throttled;
st.flows_plimit = q->stat_flows_plimit;