summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig24
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_bpf.c208
-rw-r--r--net/sched/act_connmark.c192
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/cls_basic.c7
-rw-r--r--net/sched/cls_bpf.c33
-rw-r--r--net/sched/cls_flow.c8
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fq.c43
-rw-r--r--net/sched/sch_teql.c11
15 files changed, 506 insertions, 45 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 706af73c969f..2274e723a3df 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -698,6 +698,30 @@ config NET_ACT_VLAN
To compile this code as a module, choose M here: the
module will be called act_vlan.
+config NET_ACT_BPF
+ tristate "BPF based action"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to execute BPF code on packets. The BPF code will decide
+ if the packet should be dropped or not.
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_bpf.
+
+config NET_ACT_CONNMARK
+ tristate "Netfilter Connection Mark Retriever"
+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+ depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+ ---help---
+ Say Y here to allow retrieving of conn mark
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_connmark.
+
config NET_CLS_IND
bool "Incoming device classification"
depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 679f24ae7f93..7ca7f4c1b8c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
+obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..82c5d7fc1988
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_bpf.h>
+#include <net/tc_act/tc_bpf.h>
+
+#define BPF_TAB_MASK 15
+
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_bpf *b = a->priv;
+ int action;
+ int filter_res;
+
+ spin_lock(&b->tcf_lock);
+ b->tcf_tm.lastuse = jiffies;
+ bstats_update(&b->tcf_bstats, skb);
+ action = b->tcf_action;
+
+ filter_res = BPF_PROG_RUN(b->filter, skb);
+ if (filter_res == 0) {
+ /* Return code 0 from the BPF program
+ * is being interpreted as a drop here.
+ */
+ action = TC_ACT_SHOT;
+ b->tcf_qstats.drops++;
+ }
+
+ spin_unlock(&b->tcf_lock);
+ return action;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *tp = skb_tail_pointer(skb);
+ struct tcf_bpf *b = a->priv;
+ struct tc_act_bpf opt = {
+ .index = b->tcf_index,
+ .refcnt = b->tcf_refcnt - ref,
+ .bindcnt = b->tcf_bindcnt - bind,
+ .action = b->tcf_action,
+ };
+ struct tcf_t t;
+ struct nlattr *nla;
+
+ if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (!nla)
+ goto nla_put_failure;
+
+ memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+
+ t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
+ if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+ goto nla_put_failure;
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, tp);
+ return -1;
+}
+
+static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
+ [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
+ [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
+ [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
+ .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a,
+ int ovr, int bind)
+{
+ struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tc_act_bpf *parm;
+ struct tcf_bpf *b;
+ u16 bpf_size, bpf_num_ops;
+ struct sock_filter *bpf_ops;
+ struct sock_fprog_kern tmp;
+ struct bpf_prog *fp;
+ int ret;
+
+ if (!nla)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[TCA_ACT_BPF_PARMS] ||
+ !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
+ return -EINVAL;
+ parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+ bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+ return -EINVAL;
+
+ bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+ if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
+ return -EINVAL;
+
+ bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ if (!bpf_ops)
+ return -ENOMEM;
+
+ memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
+
+ tmp.len = bpf_num_ops;
+ tmp.filter = bpf_ops;
+
+ ret = bpf_prog_create(&fp, &tmp);
+ if (ret)
+ goto free_bpf_ops;
+
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
+ if (ret)
+ goto destroy_fp;
+
+ ret = ACT_P_CREATED;
+ } else {
+ if (bind)
+ goto destroy_fp;
+ tcf_hash_release(a, bind);
+ if (!ovr) {
+ ret = -EEXIST;
+ goto destroy_fp;
+ }
+ }
+
+ b = to_bpf(a);
+ spin_lock_bh(&b->tcf_lock);
+ b->tcf_action = parm->action;
+ b->bpf_num_ops = bpf_num_ops;
+ b->bpf_ops = bpf_ops;
+ b->filter = fp;
+ spin_unlock_bh(&b->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(a);
+ return ret;
+
+destroy_fp:
+ bpf_prog_destroy(fp);
+free_bpf_ops:
+ kfree(bpf_ops);
+ return ret;
+}
+
+static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_bpf *b = a->priv;
+
+ bpf_prog_destroy(b->filter);
+}
+
+static struct tc_action_ops act_bpf_ops = {
+ .kind = "bpf",
+ .type = TCA_ACT_BPF,
+ .owner = THIS_MODULE,
+ .act = tcf_bpf,
+ .dump = tcf_bpf_dump,
+ .cleanup = tcf_bpf_cleanup,
+ .init = tcf_bpf_init,
+};
+
+static int __init bpf_init_module(void)
+{
+ return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __exit bpf_cleanup_module(void)
+{
+ tcf_unregister_action(&act_bpf_ops);
+}
+
+module_init(bpf_init_module);
+module_exit(bpf_cleanup_module);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("TC BPF based action");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
new file mode 100644
index 000000000000..8e472518f9f6
--- /dev/null
+++ b/net/sched/act_connmark.c
@@ -0,0 +1,192 @@
+/*
+ * net/sched/act_connmark.c netfilter connmark retriever action
+ * skb mark is over-written
+ *
+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_connmark.h>
+#include <net/tc_act/tc_connmark.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNMARK_TAB_MASK 3
+
+static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ const struct nf_conntrack_tuple_hash *thash;
+ struct nf_conntrack_tuple tuple;
+ enum ip_conntrack_info ctinfo;
+ struct tcf_connmark_info *ca = a->priv;
+ struct nf_conn *c;
+ int proto;
+
+ spin_lock(&ca->tcf_lock);
+ ca->tcf_tm.lastuse = jiffies;
+ bstats_update(&ca->tcf_bstats, skb);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (skb->len < sizeof(struct iphdr))
+ goto out;
+
+ proto = NFPROTO_IPV4;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (skb->len < sizeof(struct ipv6hdr))
+ goto out;
+
+ proto = NFPROTO_IPV6;
+ } else {
+ goto out;
+ }
+
+ c = nf_ct_get(skb, &ctinfo);
+ if (c) {
+ skb->mark = c->mark;
+ /* using overlimits stats to count how many packets marked */
+ ca->tcf_qstats.overlimits++;
+ nf_ct_put(c);
+ goto out;
+ }
+
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ proto, &tuple))
+ goto out;
+
+ thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+ if (!thash)
+ goto out;
+
+ c = nf_ct_tuplehash_to_ctrack(thash);
+ /* using overlimits stats to count how many packets marked */
+ ca->tcf_qstats.overlimits++;
+ skb->mark = c->mark;
+ nf_ct_put(c);
+
+out:
+ skb->nfct = NULL;
+ spin_unlock(&ca->tcf_lock);
+ return ca->tcf_action;
+}
+
+static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
+ [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
+};
+
+static int tcf_connmark_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a,
+ int ovr, int bind)
+{
+ struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ struct tcf_connmark_info *ci;
+ struct tc_connmark *parm;
+ int ret = 0;
+
+ if (!nla)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+ if (ret < 0)
+ return ret;
+
+ parm = nla_data(tb[TCA_CONNMARK_PARMS]);
+
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+ if (ret)
+ return ret;
+
+ ci = to_connmark(a);
+ ci->tcf_action = parm->action;
+ ci->zone = parm->zone;
+
+ tcf_hash_insert(a);
+ ret = ACT_P_CREATED;
+ } else {
+ ci = to_connmark(a);
+ if (bind)
+ return 0;
+ tcf_hash_release(a, bind);
+ if (!ovr)
+ return -EEXIST;
+ /* replacing action and zone */
+ ci->tcf_action = parm->action;
+ ci->zone = parm->zone;
+ }
+
+ return ret;
+}
+
+static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_connmark_info *ci = a->priv;
+
+ struct tc_connmark opt = {
+ .index = ci->tcf_index,
+ .refcnt = ci->tcf_refcnt - ref,
+ .bindcnt = ci->tcf_bindcnt - bind,
+ .action = ci->tcf_action,
+ .zone = ci->zone,
+ };
+ struct tcf_t t;
+
+ if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+ if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
+ goto nla_put_failure;
+
+ return skb->len;
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_connmark_ops = {
+ .kind = "connmark",
+ .type = TCA_ACT_CONNMARK,
+ .owner = THIS_MODULE,
+ .act = tcf_connmark,
+ .dump = tcf_connmark_dump,
+ .init = tcf_connmark_init,
+};
+
+static int __init connmark_init_module(void)
+{
+ return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
+}
+
+static void __exit connmark_cleanup_module(void)
+{
+ tcf_unregister_action(&act_connmark_ops);
+}
+
+module_init(connmark_init_module);
+module_exit(connmark_cleanup_module);
+MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
+MODULE_DESCRIPTION("Connection tracking mark restoring");
+MODULE_LICENSE("GPL");
+
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40dac709..4cd5cf1aedf8 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
if (unlikely(action == TC_ACT_SHOT))
goto drop;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index aad6a679fb13..baef987fe2c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
}
EXPORT_SYMBOL(tcf_exts_change);
-#define tcf_exts_first_act(ext) \
- list_first_entry(&(exts)->actions, struct tc_action, list)
+#define tcf_exts_first_act(ext) \
+ list_first_entry_or_null(&(exts)->actions, \
+ struct tc_action, list)
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
@@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *a = tcf_exts_first_act(exts);
- if (tcf_action_copy_stats(skb, a, 1) < 0)
+ if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
return -1;
#endif
return 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5aed341406c2..fc399db86f11 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
if (head == NULL)
return 0UL;
- list_for_each_entry(f, &head->flist, link)
- if (f->handle == handle)
+ list_for_each_entry(f, &head->flist, link) {
+ if (f->handle == handle) {
l = (unsigned long) f;
+ break;
+ }
+ }
return l;
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 84c8219c3e1c..5f3ee9e4b5bf 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -37,7 +37,7 @@ struct cls_bpf_prog {
struct tcf_result res;
struct list_head link;
u32 handle;
- u16 bpf_len;
+ u16 bpf_num_ops;
struct tcf_proto *tp;
struct rcu_head rcu;
};
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
struct tcf_exts exts;
struct sock_fprog_kern tmp;
struct bpf_prog *fp;
- u16 bpf_size, bpf_len;
+ u16 bpf_size, bpf_num_ops;
u32 classid;
int ret;
@@ -173,13 +173,18 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
return ret;
classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
- bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
- if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
+ ret = -EINVAL;
+ goto errout;
+ }
+
+ bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+ if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
ret = -EINVAL;
goto errout;
}
- bpf_size = bpf_len * sizeof(*bpf_ops);
bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
if (bpf_ops == NULL) {
ret = -ENOMEM;
@@ -188,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
- tmp.len = bpf_len;
+ tmp.len = bpf_num_ops;
tmp.filter = bpf_ops;
ret = bpf_prog_create(&fp, &tmp);
if (ret)
goto errout_free;
- prog->bpf_len = bpf_len;
+ prog->bpf_num_ops = bpf_num_ops;
prog->bpf_ops = bpf_ops;
prog->filter = fp;
prog->res.classid = classid;
@@ -215,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
struct cls_bpf_head *head)
{
unsigned int i = 0x80000000;
+ u32 handle;
do {
if (++head->hgen == 0x7FFFFFFF)
head->hgen = 1;
} while (--i > 0 && cls_bpf_get(tp, head->hgen));
- if (i == 0)
+
+ if (unlikely(i == 0)) {
pr_err("Insufficient number of handles\n");
+ handle = 0;
+ } else {
+ handle = head->hgen;
+ }
- return i;
+ return handle;
}
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
@@ -303,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
- if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
goto nla_put_failure;
- nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
sizeof(struct sock_filter));
if (nla == NULL)
goto nla_put_failure;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f24a521..461410394d08 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
if (flow->dst)
return ntohl(flow->dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
if (flow->ports)
return ntohs(flow->port16[1]);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4efe468c..a3d79c8bf3b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct net_device *dev, *indev = NULL;
int ret, network_offset;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
acpar.family = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c399b99a..b5294ce20cd4 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag)
{
unsigned short tag;
- tag = vlan_tx_tag_get(skb);
+ tag = skb_vlan_tag_get(skb);
if (!tag && __vlan_get_tag(skb, &tag))
*err = -1;
else
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = skb->protocol;
+ dst->value = tc_skb_protocol(skb);
}
META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e05bd6..243b7d169d61 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- __be16 protocol = skb->protocol;
+ __be16 protocol = tc_skb_protocol(skb);
int err;
for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f27f94..66700a6116aa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
if (skb_cow_head(skb, sizeof(struct iphdr)))
goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
*/
if (p->mask[index] != 0xff || p->value[index])
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb->protocol));
+ __func__, ntohs(tc_skb_protocol(skb)));
break;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9b05924cc386..dfcea20e3171 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1,7 +1,7 @@
/*
* net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
*
- * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -52,6 +52,7 @@
#include <net/pkt_sched.h>
#include <net/sock.h>
#include <net/tcp_states.h>
+#include <net/tcp.h>
/*
* Per flow structure, dynamically allocated
@@ -92,6 +93,7 @@ struct fq_sched_data {
u32 flow_refill_delay;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
+ u32 orphan_mask; /* mask for orphaned skb */
struct rb_root *fq_root;
u8 rate_enable;
u8 fq_trees_log;
@@ -222,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
return &q->internal;
- if (unlikely(!sk)) {
+ /* SYNACK messages are attached to a listener socket.
+ * 1) They are not part of a 'flow' yet
+ * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+ * especially if the listener set SO_MAX_PACING_RATE
+ * 3) We pretend they are orphaned
+ */
+ if (!sk || sk->sk_state == TCP_LISTEN) {
+ unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+
/* By forcing low order bit to 1, we make sure to not
* collide with a local flow (socket pointers are word aligned)
*/
- sk = (struct sock *)(skb_get_hash(skb) | 1L);
+ sk = (struct sock *)((hash << 1) | 1UL);
+ skb_orphan(skb);
}
root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -445,7 +456,9 @@ begin:
goto begin;
}
- if (unlikely(f->head && now < f->time_next_packet)) {
+ skb = f->head;
+ if (unlikely(skb && now < f->time_next_packet &&
+ !skb_is_tcp_pure_ack(skb))) {
head->first = f->next;
fq_flow_set_throttled(q, f);
goto begin;
@@ -464,14 +477,17 @@ begin:
goto begin;
}
prefetch(&skb->end);
- f->time_next_packet = now;
f->credit -= qdisc_pkt_len(skb);
if (f->credit > 0 || !q->rate_enable)
goto out;
+ /* Do not pace locally generated ack packets */
+ if (skb_is_tcp_pure_ack(skb))
+ goto out;
+
rate = q->flow_max_rate;
- if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
+ if (skb->sk)
rate = min(skb->sk->sk_pacing_rate, rate);
if (rate != ~0U) {
@@ -670,8 +686,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_FQ_FLOW_PLIMIT])
q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
- if (tb[TCA_FQ_QUANTUM])
- q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+ if (tb[TCA_FQ_QUANTUM]) {
+ u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+
+ if (quantum > 0)
+ q->quantum = quantum;
+ else
+ err = -EINVAL;
+ }
if (tb[TCA_FQ_INITIAL_QUANTUM])
q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
@@ -698,6 +720,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
}
+ if (tb[TCA_FQ_ORPHAN_MASK])
+ q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+
if (!err) {
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
@@ -743,6 +768,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
q->delayed = RB_ROOT;
q->fq_root = NULL;
q->fq_trees_log = ilog2(1024);
+ q->orphan_mask = 1024 - 1;
qdisc_watchdog_init(&q->watchdog, sch);
if (opt)
@@ -772,6 +798,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
jiffies_to_usecs(q->flow_refill_delay)) ||
+ nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6ada42396a24..e02687185a59 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch)
return NULL;
}
-static inline void
-teql_neigh_release(struct neighbour *n)
-{
- if (n)
- neigh_release(n);
-}
-
static void
teql_reset(struct Qdisc *sch)
{
@@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
- NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ haddr, NULL, skb->len);
if (err < 0)
err = -EINVAL;