diff options
author | Patrick McHardy <kaber@trash.net> | 2007-03-28 21:31:36 +0200 |
---|---|---|
committer | Adrian Bunk <bunk@stusta.de> | 2007-03-28 21:31:36 +0200 |
commit | 39740872332b2dd28b479d9d8333c42c32465953 (patch) | |
tree | 9cd54571212e6cffc911f9e8724d27c141e036cf /net | |
parent | 921e8ebfc8b0e41dc724ea52f4b95b7c44f6e880 (diff) |
[NET_SCHED]: Fix endless loops caused by inaccurate qlen counters
There are multiple problems related to qlen adjustment that can lead
to an upper qdisc getting out of sync with the real number of packets
queued, leading to endless dequeueing attempts by the upper layer code.
All qdiscs must maintain an accurate q.qlen counter. There are basically
two groups of operations affecting the qlen: operations that propagate
down the tree (enqueue, dequeue, requeue, drop, reset) beginning at the
root qdisc and operations only affecting a subtree or single qdisc
(change, graft, delete class). Since qlen changes during operations from
the second group don't propagate to ancestor qdiscs, their qlen values
become desynchronized.
This patch adds a function to propagate qlen changes up the qdisc tree,
optionally calling a callback function to perform qdisc-internal
maintenance when the child qdisc is deactivated, and converts all
qdiscs to use this where necessary.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Diffstat (limited to 'net')
-rw-r--r-- | net/sched/sch_api.c | 39 | ||||
-rw-r--r-- | net/sched/sch_atm.c | 5 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 10 | ||||
-rw-r--r-- | net/sched/sch_dsmark.c | 7 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 7 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 30 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 39 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 5 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 14 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 3 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 15 |
11 files changed, 126 insertions, 48 deletions
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9e315bcbc072..c7f17dd08c9e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -192,21 +192,28 @@ int unregister_qdisc(struct Qdisc_ops *qops) (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) { - read_unlock(&qdisc_tree_lock); + if (q->handle == handle) return q; - } } - read_unlock(&qdisc_tree_lock); return NULL; } + +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + struct Qdisc *q; + + read_lock(&qdisc_tree_lock); + q = __qdisc_lookup(dev, handle); + read_unlock(&qdisc_tree_lock); + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -349,6 +356,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) return oqdisc; } +void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +{ + struct Qdisc_class_ops *cops; + unsigned long cl; + u32 parentid; + + if (n == 0) + return; + while ((parentid = sch->parent)) { + sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + cops = sch->ops->cl_ops; + if (cops->qlen_notify) { + cl = cops->get(sch, parentid); + cops->qlen_notify(sch, cl); + cops->put(sch, cl); + } + sch->q.qlen -= n; + } +} +EXPORT_SYMBOL(qdisc_tree_decrease_qlen); /* Graft qdisc "new" to class "classid" of qdisc "parent" or to device "dev". diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 93ebce40acac..6c14971a0ddb 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -317,7 +317,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, } memset(flow,0,sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) flow->q = &noop_qdisc; DPRINTK("atm_tc_change: qdisc %p\n",flow->q); flow->sock = sock; @@ -577,7 +577,8 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); p->link.filter_list = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6cd81708bf71..9cda4064741c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1430,7 +1430,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.sibling = &q->link; q->link.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) q->link.q = &noop_qdisc; q->link.priority = TC_CBQ_MAXPRIO-1; @@ -1675,7 +1676,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL) + if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_POLICE @@ -1686,7 +1688,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = cl->q; cl->q = new; - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1934,7 +1936,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; cl->classid = classid; cl->tparent = parent; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 13e0e7b3856b..3e0e5b91cb4b 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -89,15 +89,16 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (new == NULL) new = &noop_qdisc; } sch_tree_lock(sch); *old = xchg(&p->q, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; @@ -388,7 +389,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) p->default_index = default_index; p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0326752bd0ec..7814562aa4c4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -440,13 +440,15 @@ errout: return ERR_PTR(-err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, + unsigned int parentid) { struct Qdisc *sch; sch = qdisc_alloc(dev, ops); if (IS_ERR(sch)) goto errout; + sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) return sch; @@ -510,7 +512,8 @@ void dev_activate(struct net_device *dev) if (dev->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); + qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, + TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 91132f6871d7..9aef64e560ae 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -958,11 +958,7 @@ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) unsigned int len = cl->qdisc->q.qlen; qdisc_reset(cl->qdisc); - if (len > 0) { - update_vf(cl, 0, 0); - set_passive(cl); - sch->q.qlen -= len; - } + qdisc_tree_decrease_qlen(cl->qdisc, len); } static void @@ -1140,7 +1136,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; cl->stats_lock = &sch->dev->queue_lock; @@ -1202,10 +1198,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - list_del(&cl->hlist); list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); + hfsc_purge_queue(sch, cl); + list_del(&cl->hlist); + if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1273,7 +1271,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid); if (new == NULL) new = &noop_qdisc; } @@ -1296,6 +1295,17 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } +static void +hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct hfsc_class *cl = (struct hfsc_class *)arg; + + if (cl->qdisc->q.qlen == 0) { + update_vf(cl, 0, 0); + set_passive(cl); + } +} + static unsigned long hfsc_get_class(struct Qdisc *sch, u32 classid) { @@ -1516,7 +1526,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) q->root.refcnt = 1; q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; q->root.stats_lock = &sch->dev->queue_lock; @@ -1779,6 +1790,7 @@ static struct Qdisc_class_ops hfsc_class_ops = { .delete = hfsc_delete_class, .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, + .qlen_notify = hfsc_qlen_notify, .get = hfsc_get_class, .put = hfsc_put_class, .bind_tcf = hfsc_bind_tcf, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3ec95df4a85e..f04e0a875c98 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1385,16 +1385,13 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct htb_class *cl = (struct htb_class*)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) == NULL) + if (new == NULL && + (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { - if (cl->prio_activity) - htb_deactivate (qdisc_priv(sch),cl); - - /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1409,6 +1406,14 @@ static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) return (cl && !cl->level) ? cl->un.leaf.q : NULL; } +static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct htb_class *cl = (struct htb_class *)arg; + + if (cl->un.leaf.q->q.qlen == 0) + htb_deactivate(qdisc_priv(sch), cl); +} + static unsigned long htb_get(struct Qdisc *sch, u32 classid) { #ifdef HTB_DEBUG @@ -1434,10 +1439,10 @@ static void htb_destroy_filters(struct tcf_proto **fl) static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); + HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); if (!cl->level) { BUG_TRAP(cl->un.leaf.q); - sch->q.qlen -= cl->un.leaf.q->q.qlen; qdisc_destroy(cl->un.leaf.q); } qdisc_put_rtab(cl->rate); @@ -1489,6 +1494,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class*)arg; + unsigned int qlen; + HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); // TODO: why don't allow to delete subtree ? references ? does @@ -1498,7 +1505,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) return -EBUSY; sch_tree_lock(sch); - + + if (!cl->level) { + qlen = cl->un.leaf.q->q.qlen; + qdisc_reset(cl->un.leaf.q); + qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + } + /* delete from hash and active; remainder in destroy_class */ list_del_init(&cl->hlist); if (cl->prio_activity) @@ -1576,11 +1589,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { + unsigned int qlen = parent->un.leaf.q->q.qlen; + /* turn parent into inner node */ - sch->q.qlen -= parent->un.leaf.q->q.qlen; + qdisc_reset(parent->un.leaf.q); + qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); qdisc_destroy (parent->un.leaf.q); if (parent->prio_activity) htb_deactivate (q,parent); @@ -1721,6 +1737,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops htb_class_ops = { .graft = htb_graft, .leaf = htb_leaf, + .qlen_notify = htb_qlen_notify, .get = htb_get, .put = htb_put, .change = htb_change_class, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ba5283204837..7c71a78775d8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -571,7 +571,8 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); return -ENOMEM; @@ -658,8 +659,8 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 1641db33a994..337a050895ac 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -223,21 +223,27 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt) for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); + } } sch_tree_unlock(sch); for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); child = xchg(&q->queues[i], child); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); qdisc_destroy(child); + } sch_tree_unlock(sch); } } @@ -295,7 +301,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = q->queues[band]; q->queues[band] = new; - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 86d8da0cbd02..a7dfcdb94459 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -389,6 +389,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = RTA_DATA(opt); + unsigned int qlen; if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) return -EINVAL; @@ -399,8 +400,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + qlen = sch->q.qlen; while (sch->q.qlen >= q->limit-1) sfq_drop(sch); + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index cb9711ea8c6c..539b08f93c3f 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -274,12 +274,14 @@ static void tbf_reset(struct Qdisc* sch) del_timer(&q->wd_timer); } -static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit) +static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) { - struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops); + struct Qdisc *q; struct rtattr *rta; int ret; + q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (q) { rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (rta) { @@ -342,12 +344,15 @@ static int tbf_change(struct Qdisc* sch, struct rtattr *opt) goto done; if (q->qdisc == &noop_qdisc) { - if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL) + if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) goto done; } sch_tree_lock(sch); - if (child) q->qdisc = child; + if (child) { + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(xchg(&q->qdisc, child)); + } q->limit = qopt->limit; q->mtu = qopt->mtu; q->max_size = max_size; @@ -449,8 +454,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; |