21 files changed, 834 insertions, 175 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..daa33432b716 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -312,6 +312,7 @@ config NET_SCH_PIE
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
+	select NET_INGRESS
 	---help---
 	  Say Y here if you want to use classifiers for incoming packets.
 	  If unsure, say Y.
@@ -477,6 +478,16 @@ config NET_CLS_BPF
 	  To compile this code as a module, choose M here: the module will
 	  be called cls_bpf.
 
+config NET_CLS_FLOWER
+	tristate "Flower classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys and masks.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cls_flower.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_CLS_BPF)	+= cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER)	+= cls_flower.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3d43e4979f27..af427a3dbcba 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 	list_for_each_entry(a, actions, list) {
 repeat:
 		ret = a->ops->act(skb, a, res);
-		if (TC_MUNGED & skb->tc_verd) {
-			/* copied already, allow trampling */
-			skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
-			skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
-		}
 		if (ret == TC_ACT_REPEAT)
 			goto repeat;	/* we need a ttl - JHS */
 		if (ret != TC_ACT_PIPE)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index dc6a2d324bd8..1d56903fd4c7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -37,6 +37,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 {
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
+	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
 		return TC_ACT_UNSPEC;
@@ -48,7 +49,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
-	filter_res = BPF_PROG_RUN(prog->filter, skb);
+	if (at_ingress) {
+		__skb_push(skb, skb->mac_len);
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+		__skb_pull(skb, skb->mac_len);
+	} else {
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+	}
 	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8e01..a42a3b257226 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	at = G_TC_AT(skb->tc_verd);
-	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
+	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (skb2 == NULL)
 		goto out;
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 59649d588d79..17e6d6669c7f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 		     struct tcf_result *res)
 {
 	struct tcf_pedit *p = a->priv;
-	int i, munged = 0;
+	int i;
 	unsigned int off;
 
 	if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
 			if (ptr == &_data)
 				skb_store_bits(skb, off + offset, ptr, 4);
-			munged++;
 		}
 
-		if (munged)
-			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
 	} else
 		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 91bd9c19471d..c79ecfd36e0f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -64,6 +64,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 {
 	struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
 	struct cls_bpf_prog *prog;
+#ifdef CONFIG_NET_CLS_ACT
+	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
+#else
+	bool at_ingress = false;
+#endif
 	int ret = -1;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +77,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
-		int filter_res = BPF_PROG_RUN(prog->filter, skb);
+		int filter_res;
+
+		if (at_ingress) {
+			/* It is safe to push/pull even if skb_shared() */
+			__skb_push(skb, skb->mac_len);
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+			__skb_pull(skb, skb->mac_len);
+		} else {
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+		}
 
 		if (filter_res == 0)
 			continue;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e288a5..76bc3a20ffdb 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
 #include <net/pkt_cls.h>
 #include <net/ip.h>
 #include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,41 @@ static inline u32 addr_fold(void *addr)
 
 static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->src)
-		return ntohl(flow->src);
+	__be32 src = flow_get_u32_src(flow);
+
+	if (src)
+		return ntohl(src);
+
 	return addr_fold(skb->sk);
 }
 
 static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->dst)
-		return ntohl(flow->dst);
+	__be32 dst = flow_get_u32_dst(flow);
+
+	if (dst)
+		return ntohl(dst);
+
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	return flow->ip_proto;
+	return flow->basic.ip_proto;
 }
 
 static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->ports)
-		return ntohs(flow->port16[0]);
+	if (flow->ports.ports)
+		return ntohs(flow->ports.src);
 
 	return addr_fold(skb->sk);
 }
 
 static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->ports)
-		return ntohs(flow->port16[1]);
+	if (flow->ports.ports)
+		return ntohs(flow->ports.dst);
 
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
@@ -295,7 +301,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 		keymask = f->keymask;
 		if (keymask & FLOW_KEYS_NEEDED)
-			skb_flow_dissect(skb, &flow_keys);
+			skb_flow_dissect_flow_keys(skb, &flow_keys);
 
 		for (n = 0; n < f->nkeys; n++) {
 			key = ffs(keymask) - 1;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..b92d3f49c23e
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,691 @@
+/*
+ * net/sched/cls_flower.c		Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+	int	indev_ifindex;
+	struct flow_dissector_key_control control;
+	struct flow_dissector_key_basic basic;
+	struct flow_dissector_key_eth_addrs eth;
+	struct flow_dissector_key_addrs ipaddrs;
+	union {
+		struct flow_dissector_key_ipv4_addrs ipv4;
+		struct flow_dissector_key_ipv6_addrs ipv6;
+	};
+	struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct fl_flow_mask {
+	struct fl_flow_key key;
+	struct fl_flow_mask_range range;
+	struct rcu_head	rcu;
+};
+
+struct cls_fl_head {
+	struct rhashtable ht;
+	struct fl_flow_mask mask;
+	struct flow_dissector dissector;
+	u32 hgen;
+	bool mask_assigned;
+	struct list_head filters;
+	struct rhashtable_params ht_params;
+	struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+	struct rhash_head ht_node;
+	struct fl_flow_key mkey;
+	struct tcf_exts exts;
+	struct tcf_result res;
+	struct fl_flow_key key;
+	struct list_head list;
+	u32 handle;
+	struct rcu_head	rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+	return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+	const u8 *bytes = (const u8 *) &mask->key;
+	size_t size = sizeof(mask->key);
+	size_t i, first = 0, last = size - 1;
+
+	for (i = 0; i < sizeof(mask->key); i++) {
+		if (bytes[i]) {
+			if (!first && i)
+				first = i;
+			last = i;
+		}
+	}
+	mask->range.start = rounddown(first, sizeof(long));
+	mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+			      const struct fl_flow_mask *mask)
+{
+	return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+			      struct fl_flow_mask *mask)
+{
+	const long *lkey = fl_key_get_start(key, mask);
+	const long *lmask = fl_key_get_start(&mask->key, mask);
+	long *lmkey = fl_key_get_start(mkey, mask);
+	int i;
+
+	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+		*lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+				  struct fl_flow_mask *mask)
+{
+	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		       struct tcf_result *res)
+{
+	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+	struct cls_fl_filter *f;
+	struct fl_flow_key skb_key;
+	struct fl_flow_key skb_mkey;
+
+	fl_clear_masked_range(&skb_key, &head->mask);
+	skb_key.indev_ifindex = skb->skb_iif;
+	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+	 * so do it rather here.
+	 */
+	skb_key.basic.n_proto = skb->protocol;
+	skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+	f = rhashtable_lookup_fast(&head->ht,
+				   fl_key_get_start(&skb_mkey, &head->mask),
+				   head->ht_params);
+	if (f) {
+		*res = f->res;
+		return tcf_exts_exec(skb, &f->exts, res);
+	}
+	return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+	struct cls_fl_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD_RCU(&head->filters);
+	rcu_assign_pointer(tp->root, head);
+
+	return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+	tcf_exts_destroy(&f->exts);
+	kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f, *next;
+
+	if (!force && !list_empty(&head->filters))
+		return false;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del_rcu(&f->list);
+		call_rcu(&f->rcu, fl_destroy_filter);
+	}
+	RCU_INIT_POINTER(tp->root, NULL);
+	if (head->mask_assigned)
+		rhashtable_destroy(&head->ht);
+	kfree_rcu(head, rcu);
+	return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long) f;
+	return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
+	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
+	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
+					    .len = IFNAMSIZ },
+	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	if (!tb[val_type])
+		return;
+	memcpy(val, nla_data(tb[val_type]), len);
+	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+		memset(mask, 0xff, len);
+	else
+		memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+		      struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_FLOWER_INDEV]) {
+		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+		if (err < 0)
+			return err;
+		key->indev_ifindex = err;
+		mask->indev_ifindex = 0xffffffff;
+	}
+#endif
+
+	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+		       sizeof(key->eth.dst));
+	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+		       sizeof(key->eth.src));
+	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+		       sizeof(key->basic.n_proto));
+	if (key->basic.n_proto == htons(ETH_P_IP) ||
+	    key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			       sizeof(key->basic.ip_proto));
+	}
+	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			       sizeof(key->ipv4.src));
+		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			       sizeof(key->ipv4.dst));
+	} else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+			       sizeof(key->ipv6.src));
+		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+			       sizeof(key->ipv6.dst));
+	}
+	if (key->basic.ip_proto == IPPROTO_TCP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	} else if (key->basic.ip_proto == IPPROTO_UDP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	}
+
+	return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+		       struct fl_flow_mask *mask2)
+{
+	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+	.head_offset = offsetof(struct cls_fl_filter, ht_node),
+	.automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+			     struct fl_flow_mask *mask)
+{
+	head->ht_params = fl_ht_params;
+	head->ht_params.key_len = fl_mask_range(mask);
+	head->ht_params.key_offset += mask->range.start;
+
+	return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member)					\
+	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member)						\
+        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
+         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member)					\
+	do {									\
+		keys[cnt].key_id = id;						\
+		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
+		cnt++;								\
+	} while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+	do {									\
+		if (FL_KEY_IN_RANGE(mask, member))				\
+			FL_KEY_SET(keys, cnt, id, member);			\
+	} while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+			      struct fl_flow_mask *mask)
+{
+	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+	size_t cnt = 0;
+
+	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
+	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_PORTS, tp);
+
+	skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+				struct fl_flow_mask *mask)
+{
+	int err;
+
+	if (head->mask_assigned) {
+		if (!fl_mask_eq(&head->mask, mask))
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	/* Mask is not assigned yet. So assign it and init hashtable
+	 * according to that.
+	 */
+	err = fl_init_hashtable(head, mask);
+	if (err)
+		return err;
+	memcpy(&head->mask, mask, sizeof(head->mask));
+	head->mask_assigned = true;
+
+	fl_init_dissector(head, mask);
+
+	return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+			struct cls_fl_filter *f, struct fl_flow_mask *mask,
+			unsigned long base, struct nlattr **tb,
+			struct nlattr *est, bool ovr)
+{
+	struct tcf_exts e;
+	int err;
+
+	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOWER_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	err = fl_set_key(net, tb, &f->key, &mask->key);
+	if (err)
+		goto errout;
+
+	fl_mask_update_range(mask);
+	fl_set_masked_key(&f->mkey, &f->key, mask);
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(&e);
+	return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+			      struct cls_fl_head *head)
+{
+	unsigned int i = 0x80000000;
+	u32 handle;
+
+	do {
+		if (++head->hgen == 0x7FFFFFFF)
+			head->hgen = 1;
+	} while (--i > 0 && fl_get(tp, head->hgen));
+
+	if (unlikely(i == 0)) {
+		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
+
+	return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+		     struct tcf_proto *tp, unsigned long base,
+		     u32 handle, struct nlattr **tca,
+		     unsigned long *arg, bool ovr)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fnew;
+	struct nlattr *tb[TCA_FLOWER_MAX + 1];
+	struct fl_flow_mask mask = {};
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+	if (err < 0)
+		return err;
+
+	if (fold && handle && fold->handle != handle)
+		return -EINVAL;
+
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
+
+	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+	if (!handle) {
+		handle = fl_grab_new_handle(tp, head);
+		if (!handle) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+	fnew->handle = handle;
+
+	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+	if (err)
+		goto errout;
+
+	err = fl_check_assign_mask(head, &mask);
+	if (err)
+		goto errout;
+
+	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+				     head->ht_params);
+	if (err)
+		goto errout;
+	if (fold)
+		rhashtable_remove_fast(&head->ht, &fold->ht_node,
+				       head->ht_params);
+
+	*arg = (unsigned long) fnew;
+
+	if (fold) {
+		list_replace_rcu(&fnew->list, &fold->list);
+		tcf_unbind_filter(tp, &fold->res);
+		call_rcu(&fold->rcu, fl_destroy_filter);
+	} else {
+		list_add_tail_rcu(&fnew->list, &head->filters);
+	}
+
+	return 0;
+
+errout:
+	kfree(fnew);
+	return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+	rhashtable_remove_fast(&head->ht, &f->ht_node,
+			       head->ht_params);
+	list_del_rcu(&f->list);
+	tcf_unbind_filter(tp, &f->res);
+	call_rcu(&f->rcu, fl_destroy_filter);
+	return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry_rcu(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	int err;
+
+	if (!memchr_inv(mask, 0, len))
+		return 0;
+	err = nla_put(skb, val_type, len, val);
+	if (err)
+		return err;
+	if (mask_type != TCA_FLOWER_UNSPEC) {
+		err = nla_put(skb, mask_type, len, mask);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct nlattr *nest;
+	struct fl_flow_key *key, *mask;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	key = &f->key;
+	mask = &head->mask.key;
+
+	if (mask->indev_ifindex) {
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(net, key->indev_ifindex);
+		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+			goto nla_put_failure;
+	}
+
+	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+			    sizeof(key->eth.dst)) ||
+	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+			    sizeof(key->eth.src)) ||
+	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.n_proto)))
+		goto nla_put_failure;
+	if ((key->basic.n_proto == htons(ETH_P_IP) ||
+	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
+	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.ip_proto)))
+		goto nla_put_failure;
+
+	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			     sizeof(key->ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			     sizeof(key->ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+				  sizeof(key->ipv6.src)) ||
+		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+				  sizeof(key->ipv6.dst))))
+		goto nla_put_failure;
+
+	if (key->basic.ip_proto == IPPROTO_TCP &&
+	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.src)) ||
+	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.dst))))
+		goto nla_put_failure;
+	else if (key->basic.ip_proto == IPPROTO_UDP &&
+		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.src)) ||
+		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.dst))))
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+	.kind		= "flower",
+	.classify	= fl_classify,
+	.init		= fl_init,
+	.destroy	= fl_destroy,
+	.get		= fl_get,
+	.change		= fl_change,
+	.delete		= fl_delete,
+	.walk		= fl_walk,
+	.dump		= fl_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+	return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index a3d79c8bf3b8..df0328ba6a48 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -92,8 +92,8 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 
 	rcu_read_lock();
 
-	if (dev && skb->skb_iif)
-		indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif);
+	if (skb->skb_iif)
+		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
 	acpar.in      = indev ? indev : dev;
 	acpar.out     = dev;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 73a123daa2cc..f06aa01d60fd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1818,13 +1818,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 			continue;
 		err = tp->classify(skb, tp, res);
 
-		if (err >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
-			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
-				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
-#endif
+		if (err >= 0)
 			return err;
-		}
 	}
 	return -1;
 }
@@ -1836,23 +1831,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int err = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	const struct tcf_proto *otp = tp;
+	int limit = 0;
 reclassify:
 #endif
 
 	err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
 	if (err == TC_ACT_RECLASSIFY) {
-		u32 verd = G_TC_VERD(skb->tc_verd);
 		tp = otp;
 
-		if (verd++ >= MAX_REC_LOOP) {
+		if (unlikely(limit++ >= MAX_REC_LOOP)) {
 			net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
 					       tp->q->ops->id,
 					       tp->prio & 0xffff,
 					       ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
-		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
 		goto reclassify;
 	}
 #endif
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9045ce..93d5742dc7e0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 /*
    CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 	--sch->q.qlen;
 }
 
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
 struct choke_skb_cb {
 	u16			classid;
 	u8			keys_valid;
-	u8			keys[QDISC_CB_PRIV_LEN - 3];
+	struct			flow_keys_digest keys;
 };
 
 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1,
 
 	if (!choke_skb_cb(skb1)->keys_valid) {
 		choke_skb_cb(skb1)->keys_valid = 1;
-		skb_flow_dissect(skb1, &temp);
-		memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
+		skb_flow_dissect_flow_keys(skb1, &temp);
+		make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 	}
 
 	if (!choke_skb_cb(skb2)->keys_valid) {
 		choke_skb_cb(skb2)->keys_valid = 1;
-		skb_flow_dissect(skb2, &temp);
-		memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
+		skb_flow_dissect_flow_keys(skb2, &temp);
+		make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 	}
 
 	return !memcmp(&choke_skb_cb(skb1)->keys,
 		       &choke_skb_cb(skb2)->keys,
-		       CHOKE_K_LEN);
+		       sizeof(choke_skb_cb(skb1)->keys));
 }
 
 /*
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 7a0bdb16ac92..535007d5f0b5 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
  *
  *  Implemented on linux by :
  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
 	[TCA_CODEL_LIMIT]	= { .type = NLA_U32 },
 	[TCA_CODEL_INTERVAL]	= { .type = NLA_U32 },
 	[TCA_CODEL_ECN]		= { .type = NLA_U32 },
+	[TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
 };
 
 static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_CODEL_ECN,
 			q->params.ecn))
 		goto nla_put_failure;
-
+	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->params.ce_threshold)))
+		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		.ldelay		= codel_time_to_us(q->vars.ldelay),
 		.dropping	= q->vars.dropping,
 		.ecn_mark	= q->stats.ecn_mark,
+		.ce_mark	= q->stats.ce_mark,
 	};
 
 	if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c244c45b78d7..d75993f89fac 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  *
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  */
 
 #include <linux/module.h>
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/flow_keys.h>
 #include <net/codel.h>
 
 /*	Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
 };
 
 static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
-				  const struct sk_buff *skb)
+				  struct sk_buff *skb)
 {
-	struct flow_keys keys;
-	unsigned int hash;
-
-	skb_flow_dissect(skb, &keys);
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src ^ keys.ip_proto,
-			    (__force u32)keys.ports, q->perturbation);
+	u32 hash = skb_get_hash_perturb(skb, q->perturbation);
 
 	return reciprocal_scale(hash, q->flows_cnt);
 }
@@ -299,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
 	[TCA_FQ_CODEL_ECN]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_FLOWS]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_QUANTUM]	= { .type = NLA_U32 },
+	[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
 };
 
 static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -329,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_FQ_CODEL_INTERVAL]) {
 		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
 
@@ -448,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 			q->flows_cnt))
 		goto nla_put_failure;
 
+	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->cparams.ce_threshold)))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -466,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.qdisc_stats.drop_overlimit = q->drop_overlimit;
 	st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
 	st.qdisc_stats.new_flow_count = q->new_flow_count;
+	st.qdisc_stats.ce_mark = q->cstats.ce_mark;
 
 	list_for_each(pos, &q->new_flows)
 		st.qdisc_stats.new_flows_len++;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 634529e0ce6b..abb9f2fec28f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
 			 */
-			if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+			if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+					sch->limit))
 				return qdisc_enqueue_tail(skb, sch);
 			else
 				goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 
 	q->DP = dp;
 	q->prio = prio;
-	q->limit = ctl->limit;
+	if (ctl->limit > sch->limit)
+		q->limit = sch->limit;
+	else
+		q->limit = ctl->limit;
 
 	if (q->backlog == 0)
 		red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
 	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
+	[TCA_GRED_LIMIT]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+		if (tb[TCA_GRED_LIMIT] != NULL)
+			sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
 		return gred_change_table_def(sch, opt);
+	}
 
 	if (tb[TCA_GRED_PARMS] == NULL ||
-	    tb[TCA_GRED_STAB] == NULL)
+	    tb[TCA_GRED_STAB] == NULL ||
+	    tb[TCA_GRED_LIMIT] != NULL)
 		return -EINVAL;
 
 	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
 		return -EINVAL;
 
+	if (tb[TCA_GRED_LIMIT])
+		sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+	else {
+		u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+		sch->limit = qlen * psched_mtu(qdisc_dev(sch));
+	}
+
 	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
 }
 
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+		goto nla_put_failure;
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe250..9d15cb6b8cb1 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
-#include <net/flow_keys.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
 	return jiffies;
 }
 
-static unsigned int skb_hash(const struct hhf_sched_data *q,
-			     const struct sk_buff *skb)
-{
-	struct flow_keys keys;
-	unsigned int hash;
-
-	if (skb->sk && skb->sk->sk_hash)
-		return skb->sk->sk_hash;
-
-	skb_flow_dissect(skb, &keys);
-	hash = jhash_3words((__force u32)keys.dst,
-			    (__force u32)keys.src ^ keys.ip_proto,
-			    (__force u32)keys.ports, q->perturbation);
-	return hash;
-}
-
 /* Looks up a heavy-hitter flow in a chaining list of table T. */
 static struct hh_flow_state *seek_list(const u32 hash,
 				       struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	/* Get hashed flow-id of the skb. */
-	hash = skb_hash(q, skb);
+	hash = skb_get_hash_perturb(skb, q->perturbation);
 
 	/* Check if this packet belongs to an already established HH flow. */
 	flow_pos = hash & HHF_BIT_MASK;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4cdbfb85686a..e7c648fa9dc3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-
-struct ingress_qdisc_data {
-	struct tcf_proto __rcu	*filter_list;
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	return NULL;
@@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
 						 unsigned long cl)
 {
-	struct ingress_qdisc_data *p = qdisc_priv(sch);
-
-	return &p->filter_list;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
+	struct net_device *dev = qdisc_dev(sch);
 
-static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct ingress_qdisc_data *p = qdisc_priv(sch);
-	struct tcf_result res;
-	struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
-	int result;
-
-	result = tc_classify(skb, fl, &res);
-
-	qdisc_bstats_update(sch, skb);
-	switch (result) {
-	case TC_ACT_SHOT:
-		result = TC_ACT_SHOT;
-		qdisc_qstats_drop(sch);
-		break;
-	case TC_ACT_STOLEN:
-	case TC_ACT_QUEUED:
-		result = TC_ACT_STOLEN;
-		break;
-	case TC_ACT_RECLASSIFY:
-	case TC_ACT_OK:
-		skb->tc_index = TC_H_MIN(res.classid);
-	default:
-		result = TC_ACT_OK;
-		break;
-	}
-
-	return result;
+	return &dev->ingress_cl_list;
 }
 
-/* ------------------------------------------------------------- */
-
 static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	net_inc_ingress_queue();
+	sch->flags |= TCQ_F_CPUSTATS;
 
 	return 0;
 }
 
 static void ingress_destroy(struct Qdisc *sch)
 {
-	struct ingress_qdisc_data *p = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
 
-	tcf_destroy_chain(&p->filter_list);
+	tcf_destroy_chain(&dev->ingress_cl_list);
 	net_dec_ingress_queue();
 }
 
@@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
+
 	return nla_nest_end(skb, nest);
 
 nla_put_failure:
@@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
-	.priv_size	=	sizeof(struct ingress_qdisc_data),
-	.enqueue	=	ingress_enqueue,
 	.init		=	ingress_init,
 	.destroy	=	ingress_destroy,
 	.dump		=	ingress_dump,
@@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void)
 	unregister_qdisc(&ingress_qdisc_ops);
 }
 
-module_init(ingress_module_init)
-module_exit(ingress_module_exit)
+module_init(ingress_module_init);
+module_exit(ingress_module_exit);
+
 MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ead2cab9a..5abd1d9de989 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
 		struct Qdisc *rootq = qdisc_root(sch);
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
-		q->duplicate = 0;
 
-		qdisc_enqueue_root(skb2, rootq);
+		q->duplicate = 0;
+		rootq->enqueue(skb2, rootq);
 		q->duplicate = dupsave;
 	}
 
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88a43ca..b8d73bca683c 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -339,8 +339,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
 
 static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
 {
-	if (!hlist_unhashed(&agg->nonfull_next))
-		hlist_del_init(&agg->nonfull_next);
+	hlist_del_init(&agg->nonfull_next);
 	q->wsum -= agg->class_weight;
 	if (q->wsum != 0)
 		q->iwsum = ONE_FP / q->wsum;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd82630d..4b815193326c 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
 #include <net/ip.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
-#include <net/flow_keys.h>
 
 /*
  * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	int i;
 	u32 p_min = ~0;
 	u32 minqlen = ~0;
-	u32 r, slot, salt, sfbhash;
+	u32 r, sfbhash;
+	u32 slot = q->slot;
 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	struct flow_keys keys;
 
 	if (unlikely(sch->q.qlen >= q->limit)) {
 		qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	fl = rcu_dereference_bh(q->filter_list);
 	if (fl) {
+		u32 salt;
+
 		/* If using external classifiers, get result and record it. */
 		if (!sfb_classify(skb, fl, &ret, &salt))
 			goto other_drop;
-		keys.src = salt;
-		keys.dst = 0;
-		keys.ports = 0;
+		sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
 	} else {
-		skb_flow_dissect(skb, &keys);
+		sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
 	}
 
-	slot = q->slot;
 
-	sfbhash = jhash_3words((__force u32)keys.dst,
-			       (__force u32)keys.src,
-			       (__force u32)keys.ports,
-			       q->bins[slot].perturbation);
 	if (!sfbhash)
 		sfbhash = 1;
 	sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (unlikely(p_min >= SFB_MAX_PROB)) {
 		/* Inelastic flow */
 		if (q->double_buffering) {
-			sfbhash = jhash_3words((__force u32)keys.dst,
-					       (__force u32)keys.src,
-					       (__force u32)keys.ports,
-					       q->bins[slot].perturbation);
+			sfbhash = skb_get_hash_perturb(skb,
+			    q->bins[slot].perturbation);
 			if (!sfbhash)
 				sfbhash = 1;
 			sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140beda5..7d1492663360 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#include <net/flow_keys.h>
 #include <net/red.h>
 
 
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
 	return &q->dep[val - SFQ_MAX_FLOWS];
 }
 
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
-       struct flow_keys        keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
-	qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
-	return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
 static unsigned int sfq_hash(const struct sfq_sched_data *q,
 			     const struct sk_buff *skb)
 {
-	const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
-	unsigned int hash;
-
-	hash = jhash_3words((__force u32)keys->dst,
-			    (__force u32)keys->src ^ keys->ip_proto,
-			    (__force u32)keys->ports, q->perturbation);
-	return hash & (q->divisor - 1);
+	return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
 }
 
 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return TC_H_MIN(skb->priority);
 
 	fl = rcu_dereference_bh(q->filter_list);
-	if (!fl) {
-		skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+	if (!fl)
 		return sfq_hash(q, skb) + 1;
-	}
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	result = tc_classify(skb, fl, &res);