diff options
Diffstat (limited to 'net/sched')
40 files changed, 420 insertions, 194 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 929218a47620..2f691fb180d1 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -328,13 +328,16 @@ config NET_CLS_FLOW module will be called cls_flow. config NET_CLS_CGROUP - bool "Control Group Classifier" + tristate "Control Group Classifier" select NET_CLS depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control cgroup of their process. + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS @@ -433,7 +436,7 @@ config NET_ACT_POLICE module. To compile this code as a module, choose M here: the - module will be called police. + module will be called act_police. config NET_ACT_GACT tristate "Generic actions" @@ -443,7 +446,7 @@ config NET_ACT_GACT accepting packets. To compile this code as a module, choose M here: the - module will be called gact. + module will be called act_gact. config GACT_PROB bool "Probability support" @@ -459,7 +462,7 @@ config NET_ACT_MIRRED other devices. To compile this code as a module, choose M here: the - module will be called mirred. + module will be called act_mirred. config NET_ACT_IPT tristate "IPtables targets" @@ -469,7 +472,7 @@ config NET_ACT_IPT classification. To compile this code as a module, choose M here: the - module will be called ipt. + module will be called act_ipt. config NET_ACT_NAT tristate "Stateless NAT" @@ -479,7 +482,7 @@ config NET_ACT_NAT netfilter for NAT unless you know what you are doing. To compile this code as a module, choose M here: the - module will be called nat. + module will be called act_nat. config NET_ACT_PEDIT tristate "Packet Editing" @@ -488,7 +491,7 @@ config NET_ACT_PEDIT Say Y here if you want to mangle the content of packets. To compile this code as a module, choose M here: the - module will be called pedit. + module will be called act_pedit. config NET_ACT_SIMP tristate "Simple Example (Debug)" @@ -502,7 +505,7 @@ config NET_ACT_SIMP If unsure, say N. To compile this code as a module, choose M here: the - module will be called simple. + module will be called act_simple. config NET_ACT_SKBEDIT tristate "SKB Editing" @@ -513,7 +516,7 @@ config NET_ACT_SKBEDIT If unsure, say N. To compile this code as a module, choose M here: the - module will be called skbedit. + module will be called act_skbedit. config NET_CLS_IND bool "Incoming device classification" diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 64f5e328cee9..972378f47f3c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/slab.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> @@ -152,7 +153,7 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, } else if (type == RTM_GETACTION) { return tcf_dump_walker(skb, cb, a, hinfo); } else { - printk("tcf_generic_walker: unknown action %d\n", type); + WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; } } @@ -402,8 +403,9 @@ void tcf_action_destroy(struct tc_action *act, int bind) module_put(a->ops->owner); act = act->next; kfree(a); - } else { /*FIXME: Remove later - catch insertion bugs*/ - printk("tcf_action_destroy: BUG? destroying NULL ops\n"); + } else { + /*FIXME: Remove later - catch insertion bugs*/ + WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n"); act = act->next; kfree(a); } @@ -667,7 +669,8 @@ nlmsg_failure: } static int -act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) +act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, + struct tc_action *a, int event) { struct sk_buff *skb; @@ -679,7 +682,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) return -EINVAL; } - return rtnl_unicast(skb, &init_net, pid); + return rtnl_unicast(skb, net, pid); } static struct tc_action * @@ -742,14 +745,15 @@ static struct tc_action *create_a(int i) act = kzalloc(sizeof(*act), GFP_KERNEL); if (act == NULL) { - printk("create_a: failed to alloc!\n"); + pr_debug("create_a: failed to alloc!\n"); return NULL; } act->order = i; return act; } -static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +static int tca_action_flush(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 pid) { struct sk_buff *skb; unsigned char *b; @@ -763,13 +767,13 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) int err = -ENOMEM; if (a == NULL) { - printk("tca_action_flush: couldnt create tc_action\n"); + pr_debug("tca_action_flush: couldnt create tc_action\n"); return err; } skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { - printk("tca_action_flush: failed skb alloc\n"); + pr_debug("tca_action_flush: failed skb alloc\n"); kfree(a); return err; } @@ -808,7 +812,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -825,7 +829,8 @@ noflush_out: } static int -tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) +tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, + u32 pid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; @@ -837,7 +842,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(tb[1], n, pid); + return tca_action_flush(net, tb[1], n, pid); else return -EINVAL; } @@ -858,7 +863,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) } if (event == RTM_GETACTION) - ret = act_get_notify(pid, n, head, event); + ret = act_get_notify(net, pid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -877,7 +882,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -888,8 +893,8 @@ err: return ret; } -static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, - u16 flags) +static int tcf_add_notify(struct net *net, struct tc_action *a, + u32 pid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -922,7 +927,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -935,7 +940,8 @@ nlmsg_failure: static int -tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr) +tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, + u32 pid, int ovr) { int ret = 0; struct tc_action *act; @@ -953,7 +959,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr) /* dump then free all the actions after update; inserted policy * stays intact * */ - ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -969,15 +975,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; - if (!net_eq(net, &init_net)) - return -EINVAL; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); if (ret < 0) return ret; if (tca[TCA_ACT_TAB] == NULL) { - printk("tc_ctl_action: received NO action attribs\n"); + pr_notice("tc_ctl_action: received NO action attribs\n"); return -EINVAL; } @@ -994,15 +997,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags&NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: - ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION); + ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, + pid, RTM_DELACTION); break; case RTM_GETACTION: - ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION); + ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, + pid, RTM_GETACTION); break; default: BUG(); @@ -1042,7 +1047,6 @@ find_dump_kind(const struct nlmsghdr *n) static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -1052,11 +1056,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct nlattr *kind = find_dump_kind(cb->nlh); - if (!net_eq(net, &init_net)) - return 0; - if (kind == NULL) { - printk("tc_dump_action: action bad kind\n"); + pr_info("tc_dump_action: action bad kind\n"); return 0; } @@ -1069,7 +1070,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) a.ops = a_o; if (a_o->walk == NULL) { - printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); + WARN(1, "tc_dump_action: %s !capable of dumping table\n", + a_o->kind); goto nla_put_failure; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e7f796aec657..8406c6654990 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -202,9 +202,9 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB - printk("GACT probability on\n"); + printk(KERN_INFO "GACT probability on\n"); #else - printk("GACT probability NOT on\n"); + printk(KERN_INFO "GACT probability NOT on\n"); #endif return tcf_register_action(&act_gact_ops); } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 082c520b0def..c7e59e6ec349 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -19,6 +19,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_ipt.h> @@ -46,8 +47,8 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int target = xt_request_find_target(AF_INET, t->u.user.name, t->u.user.revision); - if (!target) - return -ENOENT; + if (IS_ERR(target)) + return PTR_ERR(target); t->u.kernel.target = target; par.table = table; @@ -198,7 +199,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, { int ret = 0, result = 0; struct tcf_ipt *ipt = a->priv; - struct xt_target_param par; + struct xt_action_param par; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -234,7 +235,8 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, break; default: if (net_ratelimit()) - printk("Bogus netfilter code %d assume ACCEPT\n", ret); + pr_notice("tc filter: Bogus netfilter code" + " %d assume ACCEPT\n", ret); result = TC_POLICE_OK; break; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index d329170243cb..1980b71c283f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/gfp.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -32,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -46,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -133,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -161,10 +167,15 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, m->tcf_tm.lastuse = jiffies; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - printk("mirred to Houston: device %s is gone!\n", - dev->name); + pr_notice("tc mirred to Houston: device %s is down\n", + dev->name); goto out; } @@ -231,6 +242,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -251,12 +284,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { - printk("Mirror/redirect action on\n"); + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba311564..724553e8ed7b 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -202,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, { struct icmphdr *icmph; - if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + if (!pskb_may_pull(skb, ihl + sizeof(*icmph))) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); @@ -212,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, (icmph->type != ICMP_PARAMETERPROB)) break; + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; @@ -247,6 +253,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6b0359a500e6..50e3d945e1f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -17,6 +17,7 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> @@ -124,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -133,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -143,41 +144,46 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { - printk("offset must be on 32 bit boundaries\n"); + pr_info("tc filter pedit" + " offset must be on 32 bit boundaries\n"); goto bad; } if (offset > 0 && offset > skb->len) { - printk("offset %d cant exceed pkt length %d\n", + pr_info("tc filter pedit" + " offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } if (munged) skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; - } else { - printk("pedit BUG: index %d\n", p->tcf_index); - } + } else + WARN(1, "pedit BUG: index %d\n", p->tcf_index); bad: p->tcf_qstats.overlimits++; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 723964c3ee4f..654f73dff7c1 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -18,6 +18,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> +#include <linux/slab.h> #include <net/act_api.h> #include <net/netlink.h> diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8daa1ebc7413..1b4bc691d7d1 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -11,6 +11,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> @@ -48,7 +49,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ - printk("simple: %s_%d\n", + pr_info("simple: %s_%d\n", (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -204,7 +205,7 @@ static int __init simp_init_module(void) { int ret = tcf_register_action(&act_simp_ops); if (!ret) - printk("Simple TC action Loaded\n"); + pr_info("Simple TC action Loaded\n"); return ret; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3725d8fa29db..5fd0c28ef79a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -24,6 +24,7 @@ #include <linux/kmod.h> #include <linux/netlink.h> #include <linux/err.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -98,8 +99,9 @@ out: } EXPORT_SYMBOL(unregister_tcf_proto_ops); -static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto *tp, unsigned long fh, int event); +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + unsigned long fh, int event); /* Select new prio value from the range, managed by kernel. */ @@ -137,9 +139,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) int err; int tp_created = 0; - if (!net_eq(net, &init_net)) - return -EINVAL; - replay: t = NLMSG_DATA(n); protocol = TC_H_MIN(t->tcm_info); @@ -158,7 +157,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - dev = __dev_get_by_index(&init_net, t->tcm_ifindex); + dev = __dev_get_by_index(net, t->tcm_ifindex); if (dev == NULL) return -ENODEV; @@ -282,7 +281,7 @@ replay: *back = tp->next; spin_unlock_bh(root_lock); - tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); err = 0; goto errout; @@ -305,10 +304,10 @@ replay: case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); if (err == 0) - tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); goto errout; case RTM_GETTFILTER: - err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -324,7 +323,7 @@ replay: *back = tp; spin_unlock_bh(root_lock); } - tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); } else { if (tp_created) tcf_destroy(tp); @@ -370,8 +369,9 @@ nla_put_failure: return -1; } -static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto *tp, unsigned long fh, int event) +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + unsigned long fh, int event) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -385,7 +385,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -418,12 +418,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; - if (!net_eq(net, &init_net)) - return 0; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4e2bda854119..efd4f95fd050 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index e4877ca6727c..78ef2c5e130b 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -10,20 +10,37 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/cgroup.h> +#include <linux/rcupdate.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> +#include <net/sock.h> +#include <net/cls_cgroup.h> -struct cgroup_cls_state -{ - struct cgroup_subsys_state css; - u32 classid; +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, +#ifdef CONFIG_NET_CLS_CGROUP + .subsys_id = net_cls_subsys_id, +#else +#define net_cls_subsys_id net_cls_subsys.subsys_id +#endif + .module = THIS_MODULE, }; + static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) { return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), @@ -79,14 +96,6 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); } -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .create = cgrp_create, - .destroy = cgrp_destroy, - .populate = cgrp_populate, - .subsys_id = net_cls_subsys_id, -}; - struct cls_cgroup_head { u32 handle; @@ -100,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, struct cls_cgroup_head *head = tp->root; u32 classid; + rcu_read_lock(); + classid = task_cls_state(current)->classid; + rcu_read_unlock(); + /* * Due to the nature of the classifier it is required to ignore all * packets originating from softirq context as accessing `current' @@ -110,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, * calls by looking at the number of nested bh disable calls because * softirqs always disables bh. */ - if (softirq_count() != SOFTIRQ_OFFSET) - return -1; - - rcu_read_lock(); - classid = task_cls_state(current)->classid; - rcu_read_unlock(); + if (softirq_count() != SOFTIRQ_OFFSET) { + /* If there is an sk_classid we'll use that. */ + if (!skb->sk) + return -1; + classid = skb->sk->sk_classid; + } if (!classid) return -1; @@ -277,12 +290,36 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - return register_tcf_proto_ops(&cls_cgroup_ops); + int ret; + + ret = cgroup_load_subsys(&net_cls_subsys); + if (ret) + goto out; + +#ifndef CONFIG_NET_CLS_CGROUP + /* We can't use rcu_assign_pointer because this is an int. */ + smp_wmb(); + net_cls_subsys_id = net_cls_subsys.subsys_id; +#endif + + ret = register_tcf_proto_ops(&cls_cgroup_ops); + if (ret) + cgroup_unload_subsys(&net_cls_subsys); + +out: + return ret; } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); + +#ifndef CONFIG_NET_CLS_CGROUP + net_cls_subsys_id = -1; + synchronize_rcu(); +#endif + + cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index e054c62857e1..f73542d2cdd0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -20,6 +20,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> +#include <linux/slab.h> #include <net/pkt_cls.h> #include <net/ip.h> @@ -601,7 +602,6 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle) static void flow_put(struct tcf_proto *tp, unsigned long f) { - return; } static int flow_dump(struct tcf_proto *tp, unsigned long fh, diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 6d6e87585fb1..93b0a7b6f9b4 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,6 +19,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index dd872d5383ef..694dcd85dec8 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e806f2314b5e..20ef330bb918 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/errno.h> +#include <linux/slab.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 07372f60bee3..4f522143811e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -31,6 +31,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -97,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -133,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + unsigned int toff; + __be32 *data, _data; + + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -173,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); - + if (ht->divisor) { + __be32 *data, _data; + + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -203,14 +226,15 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: if (net_ratelimit()) - printk("cls_u32: dead loop\n"); + printk(KERN_WARNING "cls_u32: dead loop\n"); return -1; } @@ -767,15 +791,15 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { static int __init init_u32(void) { - printk("u32 classifier\n"); + pr_info("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF - printk(" Performance counters on\n"); + pr_info(" Performance counters on\n"); #endif #ifdef CONFIG_NET_CLS_IND - printk(" input device check on \n"); + pr_info(" input device check on\n"); #endif #ifdef CONFIG_NET_CLS_ACT - printk(" Actions configured \n"); + pr_info(" Actions configured\n"); #endif return register_tcf_proto_ops(&cls_u32_ops); } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 24dce8b648a4..3bcac8aa333c 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -58,6 +58,7 @@ * only available if that subsystem is enabled in the kernel. */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 370a1b2ea317..1a4176aee6e5 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -9,6 +9,7 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ +#include <linux/gfp.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 853c5ead87fd..763253257411 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -9,6 +9,7 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/ematch.c b/net/sched/ematch.c index aab59409728b..5e37da961f80 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -82,6 +82,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -526,7 +527,8 @@ pop_stack: stack_overflow: if (net_ratelimit()) - printk("Local stack overflow, increase NET_EMATCH_STACK\n"); + printk(KERN_WARNING "tc ematch: local stack overflow," + " increase NET_EMATCH_STACK\n"); return -1; } EXPORT_SYMBOL(__tcf_em_tree_match); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 75fd1c672c61..b9e8c3b7d406 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -28,16 +28,19 @@ #include <linux/list.h> #include <linux/hrtimer.h> #include <linux/lockdep.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event); +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event); /* @@ -638,11 +641,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, +static void notify_and_destroy(struct net *net, struct sk_buff *skb, + struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new) { if (new || old) - qdisc_notify(skb, n, clid, old, new); + qdisc_notify(net, skb, n, clid, old, new); if (old) qdisc_destroy(old); @@ -662,6 +666,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, struct Qdisc *new, struct Qdisc *old) { struct Qdisc *q = old; + struct net *net = dev_net(dev); int err = 0; if (parent == NULL) { @@ -698,12 +703,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } if (!ingress) { - notify_and_destroy(skb, n, classid, dev->qdisc, new); + notify_and_destroy(net, skb, n, classid, + dev->qdisc, new); if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; } else { - notify_and_destroy(skb, n, classid, old, new); + notify_and_destroy(net, skb, n, classid, old, new); } if (dev->flags & IFF_UP) @@ -721,7 +727,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, err = -ENOENT; } if (!err) - notify_and_destroy(skb, n, classid, old, new); + notify_and_destroy(net, skb, n, classid, old, new); } return err; } @@ -947,10 +953,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -990,7 +993,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; } else { - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); } return 0; } @@ -1009,16 +1012,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *q, *p; int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1105,7 +1105,7 @@ replay: return -EINVAL; err = qdisc_change(q, tca); if (err == 0) - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); return err; create_n_graft: @@ -1195,8 +1195,14 @@ nla_put_failure: return -1; } -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, - u32 clid, struct Qdisc *old, struct Qdisc *new) +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -1205,28 +1211,23 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, if (!skb) return -ENOBUFS; - if (old && old->handle) { + if (old && !tc_qdisc_dump_ignore(old)) { if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } - if (new) { + if (new && !tc_qdisc_dump_ignore(new)) { if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); return -EINVAL; } -static bool tc_qdisc_dump_ignore(struct Qdisc *q) -{ - return (q->flags & TCQ_F_BUILTIN) ? true : false; -} - static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx) @@ -1274,15 +1275,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; - if (!net_eq(net, &init_net)) - return 0; - s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; rcu_read_lock(); idx = 0; - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) @@ -1334,10 +1332,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1418,10 +1413,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); goto out; case RTM_GETTCLASS: - err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS); + err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); goto out; default: err = -EINVAL; @@ -1434,7 +1429,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (cops->change) err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) - tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); + tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); out: if (cl) @@ -1486,8 +1481,9 @@ nla_put_failure: return -1; } -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event) +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -1501,7 +1497,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args @@ -1576,12 +1572,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int t, s_t; - if (!net_eq(net, &init_net)) - return 0; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1644,9 +1637,12 @@ reclassify: tp = otp; if (verd++ >= MAX_REC_LOOP) { - printk("rule prio %u protocol %02x reclassify loop, " - "packet dropped\n", - tp->prio&0xffff, ntohs(tp->protocol)); + if (net_ratelimit()) + printk(KERN_NOTICE + "%s: packet reclassify loop" + " rule prio %u protocol %02x\n", + tp->q->ops->id, + tp->prio & 0xffff, ntohs(tp->protocol)); return TC_ACT_SHOT; } skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); @@ -1691,7 +1687,7 @@ static int psched_show(struct seq_file *seq, void *v) static int psched_open(struct inode *inode, struct file *file) { - return single_open(file, psched_show, PDE(inode)->data); + return single_open(file, psched_show, NULL); } static const struct file_operations psched_fops = { @@ -1701,14 +1697,53 @@ static const struct file_operations psched_fops = { .llseek = seq_lseek, .release = single_release, }; + +static int __net_init psched_net_init(struct net *net) +{ + struct proc_dir_entry *e; + + e = proc_net_fops_create(net, "psched", 0, &psched_fops); + if (e == NULL) + return -ENOMEM; + + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ + proc_net_remove(net, "psched"); +} +#else +static int __net_init psched_net_init(struct net *net) +{ + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ +} #endif +static struct pernet_operations psched_net_ops = { + .init = psched_net_init, + .exit = psched_net_exit, +}; + static int __init pktsched_init(void) { + int err; + + err = register_pernet_subsys(&psched_net_ops); + if (err) { + printk(KERN_ERR "pktsched_init: " + "cannot initialize per netns operations\n"); + return err; + } + register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); - proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ab82f145f689..fcbb86a486a2 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -3,6 +3,7 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/string.h> #include <linux/errno.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 3846d65bc03e..28c01ef5abc8 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,6 +11,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a65604f8f2b8..b74046a95397 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -9,6 +9,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/netdevice.h> diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index d303daa45d49..63d41f86679c 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 69188e8358b4..5948bafa8ce2 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -43,6 +44,26 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) return qdisc_reshape_fail(skb, sch); } +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct sk_buff *skb_head; + struct fifo_sched_data *q = qdisc_priv(sch); + + if (likely(skb_queue_len(&sch->q) < q->limit)) + return qdisc_enqueue_tail(skb, sch); + + /* queue full, remove one skb to fulfill the limit */ + skb_head = qdisc_dequeue_head(sch); + sch->bstats.bytes -= qdisc_pkt_len(skb_head); + sch->bstats.packets--; + sch->qstats.drops++; + kfree_skb(skb_head); + + qdisc_enqueue_tail(skb, sch); + + return NET_XMIT_CN; +} + static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { struct fifo_sched_data *q = qdisc_priv(sch); @@ -108,6 +129,20 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { }; EXPORT_SYMBOL(bfifo_qdisc_ops); +struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { + .id = "pfifo_head_drop", + .priv_size = sizeof(struct fifo_sched_data), + .enqueue = pfifo_tail_enqueue, + .dequeue = qdisc_dequeue_head, + .peek = qdisc_peek_head, + .drop = qdisc_queue_drop_head, + .init = fifo_init, + .reset = qdisc_reset_queue, + .change = fifo_init, + .dump = fifo_dump, + .owner = THIS_MODULE, +}; + /* Pass size change message down to embedded FIFO */ int fifo_set_limit(struct Qdisc *q, unsigned int limit) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5173c1e1b19c..a63029ef3edd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -24,7 +24,9 @@ #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/list.h> +#include <linux/slab.h> #include <net/pkt_sched.h> +#include <net/dst.h> /* Main transmission queue. */ @@ -39,6 +41,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { + skb_dst_force(skb); q->gso_skb = skb; q->qstats.requeues++; q->q.qlen++; /* it's still part of the queue */ @@ -93,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * Another cpu is holding lock, requeue & delay xmits for * some time. */ - __get_cpu_var(netdev_rx_stat).cpu_collision++; + __get_cpu_var(softnet_data).cpu_collision++; ret = dev_requeue_skb(skb, q); } @@ -178,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; - + WARN_ON_ONCE(skb_dst_is_noref(skb)); root_lock = qdisc_lock(q); dev = qdisc_dev(q); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); @@ -528,7 +531,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, unsigned int size; int err = -ENOBUFS; - /* ensure that the Qdisc and the private data are 32-byte aligned */ + /* ensure that the Qdisc and the private data are 64-byte aligned */ size = QDISC_ALIGN(sizeof(*sch)); size += ops->priv_size + (QDISC_ALIGNTO - 1); @@ -590,6 +593,13 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); +static void qdisc_rcu_free(struct rcu_head *head) +{ + struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); + + kfree((char *) qdisc - qdisc->padded); +} + void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -613,7 +623,11 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - kfree((char *) qdisc - qdisc->padded); + /* + * gen_estimator est_timer() might access qdisc->q.lock, + * wait a RCU grace period before freeing qdisc. + */ + call_rcu(&qdisc->rcu_head, qdisc_rcu_free); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 40408d595c08..51dcc2aa5c92 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -18,6 +18,7 @@ * For all the glorious comments look at include/net/red.h */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b38b39c60752..abd904be4287 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -617,7 +617,6 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; - return; } static void @@ -1155,7 +1154,7 @@ static struct hfsc_class * hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl; + struct hfsc_class *head, *cl; struct tcf_result res; struct tcf_proto *tcf; int result; @@ -1166,6 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return cl; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + head = &q->root; tcf = q->root.filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT @@ -1180,6 +1180,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if ((cl = (struct hfsc_class *)res.class) == NULL) { if ((cl = hfsc_find_class(res.classid, sch)) == NULL) break; /* filter selected invalid classid */ + if (cl->level >= head->level) + break; /* filter may only point downwards */ } if (cl->level == 0) @@ -1187,6 +1189,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* apply inner filter chain */ tcf = cl->filter_list; + head = cl; } /* classification failed, try default class */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 508cf5f3a6d5..0b52b8de562c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -36,6 +36,7 @@ #include <linux/compiler.h> #include <linux/rbtree.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include <net/netlink.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a9e646bdb605..f10e34a68445 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -44,7 +44,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl) static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { - return; } static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d1dea3d5dc92..fe91e50f9d98 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -9,6 +9,7 @@ */ #include <linux/types.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> @@ -173,7 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid) static void mq_put(struct Qdisc *sch, unsigned long cl) { - return; } static int mq_dump_class(struct Qdisc *sch, unsigned long cl, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7db2c88ce585..6ae251279fc2 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -18,6 +18,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -339,7 +340,6 @@ static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, static void multiq_put(struct Qdisc *q, unsigned long cl) { - return; } static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d8b10e054627..4714ff162bbd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,6 +14,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 93285cecb246..0748fb1e3a49 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,6 +12,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -302,7 +303,6 @@ static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 clas static void prio_put(struct Qdisc *q, unsigned long cl) { - return; } static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 072cdf442f8e..8d42bb3ba540 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -303,7 +303,6 @@ static unsigned long red_get(struct Qdisc *sch, u32 classid) static void red_put(struct Qdisc *sch, unsigned long arg) { - return; } static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index cb21380c0605..c65762823f5e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -20,6 +20,7 @@ #include <linux/ipv6.h> #include <linux/skbuff.h> #include <linux/jhash.h> +#include <linux/slab.h> #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -122,8 +123,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); - h = iph->daddr; - h2 = iph->saddr ^ iph->protocol; + h = (__force u32)iph->daddr; + h2 = (__force u32)iph->saddr ^ iph->protocol; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || @@ -137,8 +138,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); - h = iph->daddr.s6_addr32[3]; - h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr; + h = (__force u32)iph->daddr.s6_addr32[3]; + h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || iph->nexthdr == IPPROTO_UDPLITE || @@ -149,7 +150,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) break; } default: - h = (unsigned long)skb_dst(skb) ^ skb->protocol; + h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol; h2 = (unsigned long)skb->sk; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 8fb8107ab188..0991c640cd3e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -273,7 +273,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) if (max_size < 0) goto done; - if (qopt->limit > 0) { + if (q->qdisc != &noop_qdisc) { + err = fifo_set_limit(q->qdisc, qopt->limit); + if (err) + goto done; + } else if (qopt->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); if (IS_ERR(child)) { err = PTR_ERR(child); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index db69637069c4..807643bdcbac 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/if_arp.h> @@ -448,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static LIST_HEAD(master_dev_list); |