diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-12-21 03:30:11 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-21 15:44:34 -0500 |
commit | 225d9b89c937633dfeec502741a174fe0bab5b9f (patch) | |
tree | d9a25ffaa6b4f14c0f3d624d3c654e9b0f7ec3bd /net/sched | |
parent | 4e68ea26e76273cc62a981a414a8319a7f4c1077 (diff) |
sch_sfq: rehash queues in perturb timer
A known Out Of Order (OOO) problem hurts SFQ when timer changes
perturbation value, since all new packets delivered to SFQ enqueue might
end on different slots than previous in-flight packets.
With round robin delivery, we can thus deliver packets in a different
order.
Since SFQ is limited to small amount of in-flight packets, we can rehash
packets so that this OOO problem is fixed.
This rehashing is performed only if internal flow classifier is in use.
We now store in skb->cb[] the "struct flow_keys" so that we dont call
skb_flow_dissect() again while rehashing.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/sch_sfq.c | 87 |
1 files changed, 81 insertions, 6 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 30cda707e400..d329a8a72357 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -136,16 +136,30 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index return &q->dep[val - SFQ_SLOTS]; } +/* + * In order to be able to quickly rehash our queue when timer changes + * q->perturbation, we store flow_keys in skb->cb[] + */ +struct sfq_skb_cb { + struct flow_keys keys; +}; + +static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); + return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; +} + static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - struct flow_keys keys; + const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; unsigned int hash; - skb_flow_dissect(skb, &keys); - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src ^ keys.ip_proto, - (__force u32)keys.ports, q->perturbation); + hash = jhash_3words((__force u32)keys->dst, + (__force u32)keys->src ^ keys->ip_proto, + (__force u32)keys->ports, q->perturbation); return hash & (q->divisor - 1); } @@ -161,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); - if (!q->filter_list) + if (!q->filter_list) { + skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); return sfq_hash(q, skb) + 1; + } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, q->filter_list, &res); @@ -423,12 +439,71 @@ sfq_reset(struct Qdisc *sch) kfree_skb(skb); } +/* + * When q->perturbation is changed, we rehash all queued skbs + * to avoid OOO (Out Of Order) effects. + * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change + * counters. + */ +static void sfq_rehash(struct sfq_sched_data *q) +{ + struct sk_buff *skb; + int i; + struct sfq_slot *slot; + struct sk_buff_head list; + + __skb_queue_head_init(&list); + + for (i = 0; i < SFQ_SLOTS; i++) { + slot = &q->slots[i]; + if (!slot->qlen) + continue; + while (slot->qlen) { + skb = slot_dequeue_head(slot); + sfq_dec(q, i); + __skb_queue_tail(&list, skb); + } + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + } + q->tail = NULL; + + while ((skb = __skb_dequeue(&list)) != NULL) { + unsigned int hash = sfq_hash(q, skb); + sfq_index x = q->ht[hash]; + + slot = &q->slots[x]; + if (x == SFQ_EMPTY_SLOT) { + x = q->dep[0].next; /* get a free slot */ + q->ht[hash] = x; + slot = &q->slots[x]; + slot->hash = hash; + } + slot_queue_add(slot, skb); + sfq_inc(q, x); + if (slot->qlen == 1) { /* The flow is new */ + if (q->tail == NULL) { /* It is the first flow */ + slot->next = x; + } else { + slot->next = q->tail->next; + q->tail->next = x; + } + q->tail = slot; + slot->allot = q->scaled_quantum; + } + } +} + static void sfq_perturbation(unsigned long arg) { struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spin_lock(root_lock); q->perturbation = net_random(); + if (!q->filter_list && q->tail) + sfq_rehash(q); + spin_unlock(root_lock); if (q->perturb_period) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); |