Merge branch 'cls_flower'

Jiri Pirko says: ==================== introduce programable flow dissector and cls_flower Per Davem's request, I prepared this patchset which introduces programmable flow dissector. For current users of flow_keys, there is a wrapper skb_flow_dissect_flow_keys which maintains the previous behaviour. For purposes of cls_flower, couple of new dissection keys were introduced. Note that this dissector can be also eventually used by openvswitch code. Also, as a next step, I plan to get rid of *skb_flow_get_ports(export) and *__skb_get_poff as their functionality can be now implemented by skb_flow_dissect as well. v2->v3: - remove TCA_FLOWER_POLICE attr suggested by Jamal v1->v2: - move __skb_tx_hash rather to dev.c as suggested by Alex ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2015-05-13 15:19:48 -0400
committer: David S. Miller <davem@davemloft.net> 2015-05-13 15:19:48 -0400
commit: dd58c6359b38cff91100593ea4926adb19e85636 (patch)
tree: c3923d418d9323bfb7cf123eb57bea2f7c63cbdd
parent: 212da1fa60a822f5a7f20d52d11524dadd192a91 (diff)
parent: 77b9900ef53ae047e36a37d13a2aa33bb2d60641 (diff)
20 files changed, 1285 insertions, 298 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a2e25de98bde..ef26e0147050 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -76,7 +76,7 @@
 #include <net/netns/generic.h>
 #include <net/pkt_sched.h>
 #include <linux/rculist.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 #include <net/switchdev.h>
 #include <net/bonding.h>
 #include <net/bond_3ad.h>
@@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
 	int noff, proto = -1;
 
 	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
-		return skb_flow_dissect(skb, fk);
+		return skb_flow_dissect_flow_keys(skb, fk);
 
-	fk->ports = 0;
+	fk->ports.ports = 0;
 	noff = skb_network_offset(skb);
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
 			return false;
 		iph = ip_hdr(skb);
-		fk->src = iph->saddr;
-		fk->dst = iph->daddr;
+		fk->addrs.src = iph->saddr;
+		fk->addrs.dst = iph->daddr;
 		noff += iph->ihl << 2;
 		if (!ip_is_fragment(iph))
 			proto = iph->protocol;
@@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
 		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
 			return false;
 		iph6 = ipv6_hdr(skb);
-		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
-		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+		fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
+		fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
 		noff += sizeof(*iph6);
 		proto = iph6->nexthdr;
 	} else {
 		return false;
 	}
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
-		fk->ports = skb_flow_get_ports(skb, noff, proto);
+		fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
 
 	return true;
 }
@@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
 	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
 		hash = bond_eth_hash(skb);
 	else
-		hash = (__force u32)flow.ports;
-	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+		hash = (__force u32)flow.ports.ports;
+	hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src;
 	hash ^= (hash >> 16);
 	hash ^= (hash >> 8);
 
diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c
index 0be6850be8a2..6739ebc08c47 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.c
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c
@@ -5,7 +5,7 @@
 #include <linux/in.h>
 #include <linux/types.h>
 #include <linux/skbuff.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 #include "enic_res.h"
 #include "enic_clsf.h"
 
@@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
 	int res;
 	struct filter data;
 
-	switch (keys->ip_proto) {
+	switch (keys->basic.ip_proto) {
 	case IPPROTO_TCP:
 		data.u.ipv4.protocol = PROTO_TCP;
 		break;
@@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
 		return -EPROTONOSUPPORT;
 	};
 	data.type = FILTER_IPV4_5TUPLE;
-	data.u.ipv4.src_addr = ntohl(keys->src);
-	data.u.ipv4.dst_addr = ntohl(keys->dst);
-	data.u.ipv4.src_port = ntohs(keys->port16[0]);
-	data.u.ipv4.dst_port = ntohs(keys->port16[1]);
+	data.u.ipv4.src_addr = ntohl(keys->addrs.src);
+	data.u.ipv4.dst_addr = ntohl(keys->addrs.dst);
+	data.u.ipv4.src_port = ntohs(keys->ports.src);
+	data.u.ipv4.dst_port = ntohs(keys->ports.dst);
 	data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
 
 	spin_lock_bh(&enic->devcmd_lock);
@@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h,
 	struct enic_rfs_fltr_node *tpos;
 
 	hlist_for_each_entry(tpos, h, node)
-		if (tpos->keys.src == k->src &&
-		    tpos->keys.dst == k->dst &&
-		    tpos->keys.ports == k->ports &&
-		    tpos->keys.ip_proto == k->ip_proto &&
-		    tpos->keys.n_proto == k->n_proto)
+		if (tpos->keys.addrs.src == k->addrs.src &&
+		    tpos->keys.addrs.dst == k->addrs.dst &&
+		    tpos->keys.ports.ports == k->ports.ports &&
+		    tpos->keys.basic.ip_proto == k->basic.ip_proto &&
+		    tpos->keys.basic.n_proto == k->basic.n_proto)
 			return tpos;
 	return NULL;
 }
@@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	int res, i;
 
 	enic = netdev_priv(dev);
-	res = skb_flow_dissect(skb, &keys);
-	if (!res || keys.n_proto != htons(ETH_P_IP) ||
-	    (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP))
+	res = skb_flow_dissect_flow_keys(skb, &keys);
+	if (!res || keys.basic.n_proto != htons(ETH_P_IP) ||
+	    (keys.basic.ip_proto != IPPROTO_TCP &&
+	     keys.basic.ip_proto != IPPROTO_UDP))
 		return -EPROTONOSUPPORT;
 
 	tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK;
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 28d9ca675a27..117c0968dd0b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
 	n = htbl_fltr_search(enic, (u16)fsp->location);
 	if (!n)
 		return -EINVAL;
-	switch (n->keys.ip_proto) {
+	switch (n->keys.basic.ip_proto) {
 	case IPPROTO_TCP:
 		fsp->flow_type = TCP_V4_FLOW;
 		break;
@@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
 		break;
 	}
 
-	fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src;
+	fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src;
 	fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0;
 
-	fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst;
+	fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst;
 	fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0;
 
-	fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0];
+	fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src;
 	fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0;
 
-	fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1];
+	fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst;
 	fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0;
 
 	fsp->ring_cookie = n->rq_id;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5993c7e2d723..8e5fe888a0ec 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
 	struct flow_keys flow;
 	int data_len;
 
-	if (!skb_flow_dissect(skb, &flow) ||
-	    !(flow.n_proto == htons(ETH_P_IP) ||
-	      flow.n_proto == htons(ETH_P_IPV6)))
+	if (!skb_flow_dissect_flow_keys(skb, &flow) ||
+	    !(flow.basic.n_proto == htons(ETH_P_IP) ||
+	      flow.basic.n_proto == htons(ETH_P_IPV6)))
 		return false;
 
-	if (flow.ip_proto == IPPROTO_TCP)
+	if (flow.basic.ip_proto == IPPROTO_TCP)
 		data_len = 12;
 	else
 		data_len = 8;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cd0951c1893d..d3ed01c18247 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 }
 #endif
 
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+		  unsigned int num_tx_queues);
+
 /*
  * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
  * as a distribution range limit for the returned value.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0b574a414e7..f83aa6568cbf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,7 +34,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 /* A. Checksumming of received packets by device.
  *
@@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
 	skb->hash = hash;
 }
 
-void __skb_get_hash(struct sk_buff *skb);
 static inline __u32 skb_get_hash(struct sk_buff *skb)
 {
 	if (!skb->l4_hash && !skb->sw_hash)
@@ -1936,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 
 	if (skb_transport_header_was_set(skb))
 		return;
-	else if (skb_flow_dissect(skb, &keys))
-		skb_set_transport_header(skb, keys.thoff);
+	else if (skb_flow_dissect_flow_keys(skb, &keys))
+		skb_set_transport_header(skb, keys.basic.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
 }
@@ -3300,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
-		  unsigned int num_tx_queues);
-
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
 #ifdef CONFIG_XFRM
@@ -3424,10 +3420,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
 				     unsigned int transport_len,
 				     __sum16(*skb_chkf)(struct sk_buff *skb));
 
-u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen);
-
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
  * @skb: skb to check
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
new file mode 100644
index 000000000000..bac9c1421f58
--- /dev/null
+++ b/include/net/flow_dissector.h
@@ -0,0 +1,167 @@
+#ifndef _NET_FLOW_DISSECTOR_H
+#define _NET_FLOW_DISSECTOR_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/in6.h>
+#include <uapi/linux/if_ether.h>
+
+/**
+ * struct flow_dissector_key_basic:
+ * @thoff: Transport header offset
+ * @n_proto: Network header protocol (eg. IPv4/IPv6)
+ * @ip_proto: Transport header protocol (eg. TCP/UDP)
+ */
+struct flow_dissector_key_basic {
+	u16	thoff;
+	__be16	n_proto;
+	u8	ip_proto;
+};
+
+/**
+ * struct flow_dissector_key_addrs:
+ * @src: source ip address in case of IPv4
+ *	 For IPv6 it contains 32bit hash of src address
+ * @dst: destination ip address in case of IPv4
+ *	 For IPv6 it contains 32bit hash of dst address
+ */
+struct flow_dissector_key_addrs {
+	/* (src,dst) must be grouped, in the same way than in IP header */
+	__be32 src;
+	__be32 dst;
+};
+
+/**
+ * flow_dissector_key_tp_ports:
+ *	@ports: port numbers of Transport header
+ *		src: source port number
+ *		dst: destination port number
+ */
+struct flow_dissector_key_ports {
+	union {
+		__be32 ports;
+		struct {
+			__be16 src;
+			__be16 dst;
+		};
+	};
+};
+
+/**
+ * struct flow_dissector_key_ipv6_addrs:
+ * @src: source ip address
+ * @dst: destination ip address
+ */
+struct flow_dissector_key_ipv6_addrs {
+	/* (src,dst) must be grouped, in the same way than in IP header */
+	struct in6_addr src;
+	struct in6_addr dst;
+};
+
+/**
+ * struct flow_dissector_key_eth_addrs:
+ * @src: source Ethernet address
+ * @dst: destination Ethernet address
+ */
+struct flow_dissector_key_eth_addrs {
+	/* (dst,src) must be grouped, in the same way than in ETH header */
+	unsigned char dst[ETH_ALEN];
+	unsigned char src[ETH_ALEN];
+};
+
+enum flow_dissector_key_id {
+	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
+	FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */
+	FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */
+	FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+	FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
+	FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
+
+	FLOW_DISSECTOR_KEY_MAX,
+};
+
+struct flow_dissector_key {
+	enum flow_dissector_key_id key_id;
+	size_t offset; /* offset of struct flow_dissector_key_*
+			  in target the struct */
+};
+
+struct flow_dissector {
+	unsigned int used_keys; /* each bit repesents presence of one key id */
+	unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
+};
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+			     const struct flow_dissector_key *key,
+			     unsigned int key_count);
+
+bool __skb_flow_dissect(const struct sk_buff *skb,
+			struct flow_dissector *flow_dissector,
+			void *target_container,
+			void *data, __be16 proto, int nhoff, int hlen);
+
+static inline bool skb_flow_dissect(const struct sk_buff *skb,
+				    struct flow_dissector *flow_dissector,
+				    void *target_container)
+{
+	return __skb_flow_dissect(skb, flow_dissector, target_container,
+				  NULL, 0, 0, 0);
+}
+
+struct flow_keys {
+	struct flow_dissector_key_addrs addrs;
+	struct flow_dissector_key_ports ports;
+	struct flow_dissector_key_basic basic;
+};
+
+extern struct flow_dissector flow_keys_dissector;
+extern struct flow_dissector flow_keys_buf_dissector;
+
+static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
+					      struct flow_keys *flow)
+{
+	memset(flow, 0, sizeof(*flow));
+	return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
+				  NULL, 0, 0, 0);
+}
+
+static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
+						  void *data, __be16 proto,
+						  int nhoff, int hlen)
+{
+	memset(flow, 0, sizeof(*flow));
+	return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+				  data, proto, nhoff, hlen);
+}
+
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen_proto);
+
+static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
+					int thoff, u8 ip_proto)
+{
+	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
+}
+
+u32 flow_hash_from_keys(struct flow_keys *keys);
+void __skb_get_hash(struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen);
+
+/* struct flow_keys_digest:
+ *
+ * This structure is used to hold a digest of the full flow keys. This is a
+ * larger "hash" of a flow to allow definitively matching specific flows where
+ * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
+ * that it can by used in CB of skb (see sch_choke for an example).
+ */
+#define FLOW_KEYS_DIGEST_LEN	16
+struct flow_keys_digest {
+	u8	data[FLOW_KEYS_DIGEST_LEN];
+};
+
+void make_flow_keys_digest(struct flow_keys_digest *digest,
+			   const struct flow_keys *flow);
+
+#endif
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
deleted file mode 100644
index 6d6ef626811a..000000000000
--- a/include/net/flow_keys.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _NET_FLOW_KEYS_H
-#define _NET_FLOW_KEYS_H
-
-/* struct flow_keys:
- *	@src: source ip address in case of IPv4
- *	      For IPv6 it contains 32bit hash of src address
- *	@dst: destination ip address in case of IPv4
- *	      For IPv6 it contains 32bit hash of dst address
- *	@ports: port numbers of Transport header
- *		port16[0]: src port number
- *		port16[1]: dst port number
- *	@thoff: Transport header offset
- *	@n_proto: Network header protocol (eg. IPv4/IPv6)
- *	@ip_proto: Transport header protocol (eg. TCP/UDP)
- * All the members, except thoff, are in network byte order.
- */
-struct flow_keys {
-	/* (src,dst) must be grouped, in the same way than in IP header */
-	__be32 src;
-	__be32 dst;
-	union {
-		__be32 ports;
-		__be16 port16[2];
-	};
-	u16	thoff;
-	__be16	n_proto;
-	u8	ip_proto;
-};
-
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
-			void *data, __be16 proto, int nhoff, int hlen);
-static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
-{
-	return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0);
-}
-__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen_proto);
-static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
-{
-	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
-}
-u32 flow_hash_from_keys(struct flow_keys *keys);
-unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
-			   __be16 protocol);
-
-/* struct flow_keys_digest:
- *
- * This structure is used to hold a digest of the full flow keys. This is a
- * larger "hash" of a flow to allow definitively matching specific flows where
- * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
- * that it can by used in CB of skb (see sch_choke for an example).
- */
-#define FLOW_KEYS_DIGEST_LEN	16
-struct flow_keys_digest {
-	u8	data[FLOW_KEYS_DIGEST_LEN];
-};
-
-void make_flow_keys_digest(struct flow_keys_digest *digest,
-			   const struct flow_keys *flow);
-
-#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index d14af7edd197..0ed6d768e606 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -31,7 +31,7 @@
 #include <net/route.h>
 #include <net/snmp.h>
 #include <net/flow.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 struct sock;
 
@@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	struct flow_keys keys;
 
-	keys.src = inet->inet_saddr;
-	keys.dst = inet->inet_daddr;
-	keys.port16[0] = inet->inet_sport;
-	keys.port16[1] = inet->inet_dport;
+	keys.addrs.src = inet->inet_saddr;
+	keys.addrs.dst = inet->inet_daddr;
+	keys.ports.src = inet->inet_sport;
+	keys.ports.dst = inet->inet_dport;
 
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 53d25ef1699a..aab8190d16e8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -19,7 +19,7 @@
 #include <net/if_inet6.h>
 #include <net/ndisc.h>
 #include <net/flow.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 #include <net/snmp.h>
 
 #define SIN6_LEN_RFC2133	24
@@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flow_keys keys;
 
-	keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
-	keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
-	keys.port16[0] = inet->inet_sport;
-	keys.port16[1] = inet->inet_dport;
+	keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr);
+	keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
+	keys.ports.src = inet->inet_sport;
+	keys.ports.dst = inet->inet_dport;
 
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ffc112c8e1c2..39fb53d67b11 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -409,6 +409,36 @@ enum {
 
 #define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
 
+/* Flower classifier */
+
+enum {
+	TCA_FLOWER_UNSPEC,
+	TCA_FLOWER_CLASSID,
+	TCA_FLOWER_INDEV,
+	TCA_FLOWER_ACT,
+	TCA_FLOWER_KEY_ETH_DST,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE,	/* be16 */
+	TCA_FLOWER_KEY_IP_PROTO,	/* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+	__TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr {
diff --git a/net/core/dev.c b/net/core/dev.c
index 90a568a150b4..af549062ae8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+		  unsigned int num_tx_queues)
+{
+	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
+
+	if (skb_rx_queue_recorded(skb)) {
+		hash = skb_get_rx_queue(skb);
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
+		return hash;
+	}
+
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
+	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
 	static const netdev_features_t null_features = 0;
@@ -2908,6 +2936,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[skb->sender_cpu - 1]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else
+				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+									   map->len)];
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
+
+	if (queue_index < 0 || skb->ooo_okay ||
+	    queue_index >= dev->real_num_tx_queues) {
+		int new_index = get_xps_queue(dev, skb);
+		if (new_index < 0)
+			new_index = skb_tx_hash(dev, skb);
+
+		if (queue_index != new_index && sk &&
+		    rcu_access_pointer(sk->sk_dst_cache))
+			sk_tx_queue_set(sk, new_index);
+
+		queue_index = new_index;
+	}
+
+	return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb,
+				    void *accel_priv)
+{
+	int queue_index = 0;
+
+#ifdef CONFIG_XPS
+	if (skb->sender_cpu == 0)
+		skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+	if (dev->real_num_tx_queues != 1) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		if (ops->ndo_select_queue)
+			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+							    __netdev_pick_tx);
+		else
+			queue_index = __netdev_pick_tx(dev, skb);
+
+		if (!accel_priv)
+			queue_index = netdev_cap_txqueue(dev, queue_index);
+	}
+
+	skb_set_queue_mapping(skb, queue_index);
+	return netdev_get_tx_queue(dev, queue_index);
+}
+
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
diff --git a/net/core/filter.c b/net/core/filter.c
index a831f193e2c7..6805717be614 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,6 +36,7 @@
 #include <net/netlink.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/flow_dissector.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <asm/uaccess.h>
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d3acc4dff4ae..204d09c42510 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
 #include <linux/ip.h>
@@ -12,19 +13,57 @@
 #include <linux/if_tunnel.h>
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
-#include <net/flow_keys.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 
-/* copy saddr & daddr, possibly using 64bit load/store
- * Equivalent to :	flow->src = iph->saddr;
- *			flow->dst = iph->daddr;
- */
-static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
+					enum flow_dissector_key_id key_id)
 {
-	BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
-		     offsetof(typeof(*flow), src) + sizeof(flow->src));
-	memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+	return flow_dissector->used_keys & (1 << key_id);
+}
+
+static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
+				       enum flow_dissector_key_id key_id)
+{
+	flow_dissector->used_keys |= (1 << key_id);
+}
+
+static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
+				       enum flow_dissector_key_id key_id,
+				       void *target_container)
+{
+	return ((char *) target_container) + flow_dissector->offset[key_id];
+}
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+			     const struct flow_dissector_key *key,
+			     unsigned int key_count)
+{
+	unsigned int i;
+
+	memset(flow_dissector, 0, sizeof(*flow_dissector));
+
+	for (i = 0; i < key_count; i++, key++) {
+		/* User should make sure that every key target offset is withing
+		 * boundaries of unsigned short.
+		 */
+		BUG_ON(key->offset > USHRT_MAX);
+		BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
+						   key->key_id));
+
+		skb_flow_dissector_set_key(flow_dissector, key->key_id);
+		flow_dissector->offset[key->key_id] = key->offset;
+	}
+
+	/* Ensure that the dissector always includes basic key. That way
+	 * we are able to avoid handling lack of it in fast path.
+	 */
+	BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+					    FLOW_DISSECTOR_KEY_BASIC));
 }
+EXPORT_SYMBOL(skb_flow_dissector_init);
 
 /**
  * __skb_flow_get_ports - extract the upper layer ports and return them
@@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @flow_dissector: list of keys to dissect
+ * @target_container: target structure to put dissected values into
  * @data: raw buffer pointer to the packet, if NULL use skb->data
  * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
  * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
  * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
  *
- * The function will try to retrieve the struct flow_keys from either the skbuff
- * or a raw buffer specified by the rest parameters
+ * The function will try to retrieve individual keys into target specified
+ * by flow_dissector from either the skbuff or a raw buffer specified by the
+ * rest parameters.
+ *
+ * Caller must take care of zeroing target container memory.
  */
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+bool __skb_flow_dissect(const struct sk_buff *skb,
+			struct flow_dissector *flow_dissector,
+			void *target_container,
 			void *data, __be16 proto, int nhoff, int hlen)
 {
+	struct flow_dissector_key_basic *key_basic;
+	struct flow_dissector_key_addrs *key_addrs;
+	struct flow_dissector_key_ports *key_ports;
 	u8 ip_proto;
 
 	if (!data) {
@@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
 		hlen = skb_headlen(skb);
 	}
 
-	memset(flow, 0, sizeof(*flow));
+	/* It is ensured by skb_flow_dissector_init() that basic key will
+	 * be always present.
+	 */
+	key_basic = skb_flow_dissector_target(flow_dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      target_container);
+
+	if (skb_flow_dissector_uses_key(flow_dissector,
+					FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct ethhdr *eth = eth_hdr(skb);
+		struct flow_dissector_key_eth_addrs *key_eth_addrs;
+
+		key_eth_addrs = skb_flow_dissector_target(flow_dissector,
+							  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+							  target_container);
+		memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+	}
 
 again:
 	switch (proto) {
@@ -100,14 +165,13 @@ ip:
 		if (ip_is_fragment(iph))
 			ip_proto = 0;
 
-		/* skip the address processing if skb is NULL.  The assumption
-		 * here is that if there is no skb we are not looking for flow
-		 * info but lengths and protocols.
-		 */
-		if (!skb)
+		if (!skb_flow_dissector_uses_key(flow_dissector,
+						 FLOW_DISSECTOR_KEY_IPV4_ADDRS))
 			break;
-
-		iph_to_flow_copy_addrs(flow, iph);
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						      target_container);
+		memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
 		break;
 	}
 	case htons(ETH_P_IPV6): {
@@ -123,23 +187,47 @@ ipv6:
 		ip_proto = iph->nexthdr;
 		nhoff += sizeof(struct ipv6hdr);
 
-		/* see comment above in IPv4 section */
-		if (!skb)
-			break;
+		if (skb_flow_dissector_uses_key(flow_dissector,
+						FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+			key_addrs = skb_flow_dissector_target(flow_dissector,
+							      FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+							      target_container);
+
+			key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+			key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+			goto flow_label;
+		}
+		if (skb_flow_dissector_uses_key(flow_dissector,
+						FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+			struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
 
-		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
-		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+			key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
+								   FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+								   target_container);
 
+			memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
+			goto flow_label;
+		}
+		break;
+flow_label:
 		flow_label = ip6_flowlabel(iph);
 		if (flow_label) {
 			/* Awesome, IPv6 packet has a flow label so we can
 			 * use that to represent the ports without any
 			 * further dissection.
 			 */
-			flow->n_proto = proto;
-			flow->ip_proto = ip_proto;
-			flow->ports = flow_label;
-			flow->thoff = (u16)nhoff;
+
+			key_basic->n_proto = proto;
+			key_basic->ip_proto = ip_proto;
+			key_basic->thoff = (u16)nhoff;
+
+			if (!skb_flow_dissector_uses_key(flow_dissector,
+							 FLOW_DISSECTOR_KEY_PORTS))
+				break;
+			key_ports = skb_flow_dissector_target(flow_dissector,
+							      FLOW_DISSECTOR_KEY_PORTS,
+							      target_container);
+			key_ports->ports = flow_label;
 
 			return true;
 		}
@@ -186,14 +274,22 @@ ipv6:
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			return false;
-		flow->src = hdr->srcnode;
-		flow->dst = 0;
-		flow->n_proto = proto;
-		flow->thoff = (u16)nhoff;
+		key_basic->n_proto = proto;
+		key_basic->thoff = (u16)nhoff;
+
+		if (skb_flow_dissector_uses_key(flow_dissector,
+						FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+			return true;
+			key_addrs = skb_flow_dissector_target(flow_dissector,
+							      FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+							      target_container);
+			key_addrs->src = hdr->srcnode;
+			key_addrs->dst = 0;
+		}
 		return true;
 	}
 	case htons(ETH_P_FCOE):
-		flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+		key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
 		/* fall through */
 	default:
 		return false;
@@ -248,14 +344,24 @@ ipv6:
 		break;
 	}
 
-	flow->n_proto = proto;
-	flow->ip_proto = ip_proto;
-	flow->thoff = (u16) nhoff;
-
-	/* unless skb is set we don't need to record port info */
-	if (skb)
-		flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
-						   data, hlen);
+	/* It is ensured by skb_flow_dissector_init() that basic key will
+	 * be always present.
+	 */
+	key_basic = skb_flow_dissector_target(flow_dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      target_container);
+	key_basic->n_proto = proto;
+	key_basic->ip_proto = ip_proto;
+	key_basic->thoff = (u16) nhoff;
+
+	if (skb_flow_dissector_uses_key(flow_dissector,
+					FLOW_DISSECTOR_KEY_PORTS)) {
+		key_ports = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_PORTS,
+						      target_container);
+		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+							data, hlen);
+	}
 
 	return true;
 }
@@ -277,16 +383,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
 	u32 hash;
 
 	/* get a consistent hash (same value on both flow directions) */
-	if (((__force u32)keys->dst < (__force u32)keys->src) ||
-	    (((__force u32)keys->dst == (__force u32)keys->src) &&
-	     ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
-		swap(keys->dst, keys->src);
-		swap(keys->port16[0], keys->port16[1]);
+	if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
+	    (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
+	     ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) {
+		swap(keys->addrs.dst, keys->addrs.src);
+		swap(keys->ports.src, keys->ports.dst);
 	}
 
-	hash = __flow_hash_3words((__force u32)keys->dst,
-				  (__force u32)keys->src,
-				  (__force u32)keys->ports,
+	hash = __flow_hash_3words((__force u32)keys->addrs.dst,
+				  (__force u32)keys->addrs.src,
+				  (__force u32)keys->ports.ports,
 				  keyval);
 	if (!hash)
 		hash = 1;
@@ -304,7 +410,7 @@ EXPORT_SYMBOL(flow_hash_from_keys);
 static inline u32 ___skb_get_hash(const struct sk_buff *skb,
 				  struct flow_keys *keys, u32 keyval)
 {
-	if (!skb_flow_dissect(skb, keys))
+	if (!skb_flow_dissect_flow_keys(skb, keys))
 		return 0;
 
 	return __flow_hash_from_keys(keys, keyval);
@@ -329,16 +435,19 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 
 	memset(digest, 0, sizeof(*digest));
 
-	data->n_proto = flow->n_proto;
-	data->ip_proto = flow->ip_proto;
-	data->ports = flow->ports;
-	data->src = flow->src;
-	data->dst = flow->dst;
+	data->n_proto = flow->basic.n_proto;
+	data->ip_proto = flow->basic.ip_proto;
+	data->ports = flow->ports.ports;
+	data->src = flow->addrs.src;
+	data->dst = flow->addrs.dst;
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
-/*
- * __skb_get_hash: calculate a flow hash based on src/dst addresses
+/**
+ * __skb_get_hash: calculate a flow hash
+ * @skb: sk_buff to calculate flow hash from
+ *
+ * This function calculates a flow hash based on src/dst addresses
  * and src/dst port numbers.  Sets hash in skb to non-zero hash value
  * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
  * if hash is a canonical 4-tuple hash over transport ports.
@@ -353,12 +462,9 @@ void __skb_get_hash(struct sk_buff *skb)
 	hash = ___skb_get_hash(skb, &keys, hashrnd);
 	if (!hash)
 		return;
-
-	if (keys.ports)
+	if (keys.ports.ports)
 		skb->l4_hash = 1;
-
 	skb->sw_hash = 1;
-
 	skb->hash = hash;
 }
 EXPORT_SYMBOL(__skb_get_hash);
@@ -371,40 +477,12 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
-		  unsigned int num_tx_queues)
-{
-	u32 hash;
-	u16 qoffset = 0;
-	u16 qcount = num_tx_queues;
-
-	if (skb_rx_queue_recorded(skb)) {
-		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= num_tx_queues))
-			hash -= num_tx_queues;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
-	}
-
-	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
-}
-EXPORT_SYMBOL(__skb_tx_hash);
-
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen)
 {
-	u32 poff = keys->thoff;
+	u32 poff = keys->basic.thoff;
 
-	switch (keys->ip_proto) {
+	switch (keys->basic.ip_proto) {
 	case IPPROTO_TCP: {
 		/* access doff as u8 to avoid unaligned access */
 		const u8 *doff;
@@ -445,8 +523,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 	return poff;
 }
 
-/* skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
+/**
+ * skb_get_poff - get the offset to the payload
+ * @skb: sk_buff to get the payload offset from
+ *
+ * The function will get the offset to the payload as far as it could
+ * be dissected.  The main user is currently BPF, so that we can dynamically
  * truncate packets without needing to push actual payload to the user
  * space and can analyze headers only, instead.
  */
@@ -454,86 +536,52 @@ u32 skb_get_poff(const struct sk_buff *skb)
 {
 	struct flow_keys keys;
 
-	if (!skb_flow_dissect(skb, &keys))
+	if (!skb_flow_dissect_flow_keys(skb, &keys))
 		return 0;
 
 	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
 }
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
-{
-#ifdef CONFIG_XPS
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	int queue_index = -1;
-
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
-	if (dev_maps) {
-		map = rcu_dereference(
-		    dev_maps->cpu_map[skb->sender_cpu - 1]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else
-				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-									   map->len)];
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
-		}
-	}
-	rcu_read_unlock();
-
-	return queue_index;
-#else
-	return -1;
-#endif
-}
-
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
-{
-	struct sock *sk = skb->sk;
-	int queue_index = sk_tx_queue_get(sk);
-
-	if (queue_index < 0 || skb->ooo_okay ||
-	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
-		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+	{
+		.key_id = FLOW_DISSECTOR_KEY_BASIC,
+		.offset = offsetof(struct flow_keys, basic),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+		.offset = offsetof(struct flow_keys, addrs),
+	},
+	{
+		.key_id = FLOW_DISSECTOR_KEY_PORTS,
+		.offset = offsetof(struct flow_keys, ports),
+	},
+};
 
-		if (queue_index != new_index && sk &&
-		    rcu_access_pointer(sk->sk_dst_cache))
-			sk_tx_queue_set(sk, new_index);
+static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+	{
+		.key_id = FLOW_DISSECTOR_KEY_BASIC,
+		.offset = offsetof(struct flow_keys, basic),
+	},
+};
 
-		queue_index = new_index;
-	}
+struct flow_dissector flow_keys_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_dissector);
 
-	return queue_index;
-}
+struct flow_dissector flow_keys_buf_dissector __read_mostly;
 
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb,
-				    void *accel_priv)
+static int __init init_default_flow_dissectors(void)
 {
-	int queue_index = 0;
-
-#ifdef CONFIG_XPS
-	if (skb->sender_cpu == 0)
-		skb->sender_cpu = raw_smp_processor_id() + 1;
-#endif
-
-	if (dev->real_num_tx_queues != 1) {
-		const struct net_device_ops *ops = dev->netdev_ops;
-		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
-							    __netdev_pick_tx);
-		else
-			queue_index = __netdev_pick_tx(dev, skb);
-
-		if (!accel_priv)
-			queue_index = netdev_cap_txqueue(dev, queue_index);
-	}
-
-	skb_set_queue_mapping(skb, queue_index);
-	return netdev_get_tx_queue(dev, queue_index);
+	skb_flow_dissector_init(&flow_keys_dissector,
+				flow_keys_dissector_keys,
+				ARRAY_SIZE(flow_keys_dissector_keys));
+	skb_flow_dissector_init(&flow_keys_buf_dissector,
+				flow_keys_buf_dissector_keys,
+				ARRAY_SIZE(flow_keys_buf_dissector_keys));
+	return 0;
 }
+
+late_initcall_sync(init_default_flow_dissectors);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9045e2a1108f..c3325bd2f3fb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
 #include <net/ipv6.h>
 #include <net/ip.h>
 #include <net/dsa.h>
+#include <net/flow_dissector.h>
 #include <linux/uaccess.h>
 
 __setup("ether=", netdev_boot_setup);
@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
-	if (!__skb_flow_dissect(NULL, &keys, data,
-				eth->h_proto, sizeof(*eth), len))
-		return max_t(u32, keys.thoff, sizeof(*eth));
+	if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
+					    sizeof(*eth), len))
+		return max_t(u32, keys.basic.thoff, sizeof(*eth));
 
 	/* parse for any L4 headers */
 	return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..5fd1c2f487d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -477,6 +477,16 @@ config NET_CLS_BPF
 	  To compile this code as a module, choose M here: the module will
 	  be called cls_bpf.
 
+config NET_CLS_FLOWER
+	tristate "Flower classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys and masks.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cls_flower.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_CLS_BPF)	+= cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER)	+= cls_flower.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e288a5..b4359924846c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
 #include <net/pkt_cls.h>
 #include <net/ip.h>
 #include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr)
 
 static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->src)
-		return ntohl(flow->src);
+	if (flow->addrs.src)
+		return ntohl(flow->addrs.src);
 	return addr_fold(skb->sk);
 }
 
 static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->dst)
-		return ntohl(flow->dst);
+	if (flow->addrs.dst)
+		return ntohl(flow->addrs.dst);
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	return flow->ip_proto;
+	return flow->basic.ip_proto;
 }
 
 static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->ports)
-		return ntohs(flow->port16[0]);
+	if (flow->ports.ports)
+		return ntohs(flow->ports.src);
 
 	return addr_fold(skb->sk);
 }
 
 static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	if (flow->ports)
-		return ntohs(flow->port16[1]);
+	if (flow->ports.ports)
+		return ntohs(flow->ports.dst);
 
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
@@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 		keymask = f->keymask;
 		if (keymask & FLOW_KEYS_NEEDED)
-			skb_flow_dissect(skb, &flow_keys);
+			skb_flow_dissect_flow_keys(skb, &flow_keys);
 
 		for (n = 0; n < f->nkeys; n++) {
 			key = ffs(keymask) - 1;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..9bc654c764cd
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/cls_flower.c		Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+	int	indev_ifindex;
+	struct flow_dissector_key_basic basic;
+	struct flow_dissector_key_eth_addrs eth;
+	union {
+		struct flow_dissector_key_addrs ipv4;
+		struct flow_dissector_key_ipv6_addrs ipv6;
+	};
+	struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct fl_flow_mask {
+	struct fl_flow_key key;
+	struct fl_flow_mask_range range;
+	struct rcu_head	rcu;
+};
+
+struct cls_fl_head {
+	struct rhashtable ht;
+	struct fl_flow_mask mask;
+	struct flow_dissector dissector;
+	u32 hgen;
+	bool mask_assigned;
+	struct list_head filters;
+	struct rhashtable_params ht_params;
+	struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+	struct rhash_head ht_node;
+	struct fl_flow_key mkey;
+	struct tcf_exts exts;
+	struct tcf_result res;
+	struct fl_flow_key key;
+	struct list_head list;
+	u32 handle;
+	struct rcu_head	rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+	return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+	const u8 *bytes = (const u8 *) &mask->key;
+	size_t size = sizeof(mask->key);
+	size_t i, first = 0, last = size - 1;
+
+	for (i = 0; i < sizeof(mask->key); i++) {
+		if (bytes[i]) {
+			if (!first && i)
+				first = i;
+			last = i;
+		}
+	}
+	mask->range.start = rounddown(first, sizeof(long));
+	mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+			      const struct fl_flow_mask *mask)
+{
+	return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+			      struct fl_flow_mask *mask)
+{
+	const long *lkey = fl_key_get_start(key, mask);
+	const long *lmask = fl_key_get_start(&mask->key, mask);
+	long *lmkey = fl_key_get_start(mkey, mask);
+	int i;
+
+	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+		*lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+				  struct fl_flow_mask *mask)
+{
+	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		       struct tcf_result *res)
+{
+	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+	struct cls_fl_filter *f;
+	struct fl_flow_key skb_key;
+	struct fl_flow_key skb_mkey;
+
+	fl_clear_masked_range(&skb_key, &head->mask);
+	skb_key.indev_ifindex = skb->skb_iif;
+	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+	 * so do it rather here.
+	 */
+	skb_key.basic.n_proto = skb->protocol;
+	skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+	f = rhashtable_lookup_fast(&head->ht,
+				   fl_key_get_start(&skb_mkey, &head->mask),
+				   head->ht_params);
+	if (f) {
+		*res = f->res;
+		return tcf_exts_exec(skb, &f->exts, res);
+	}
+	return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+	struct cls_fl_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD_RCU(&head->filters);
+	rcu_assign_pointer(tp->root, head);
+
+	return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+	tcf_exts_destroy(&f->exts);
+	kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f, *next;
+
+	if (!force && !list_empty(&head->filters))
+		return false;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del_rcu(&f->list);
+		call_rcu(&f->rcu, fl_destroy_filter);
+	}
+	RCU_INIT_POINTER(tp->root, NULL);
+	if (head->mask_assigned)
+		rhashtable_destroy(&head->ht);
+	kfree_rcu(head, rcu);
+	return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long) f;
+	return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
+	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
+	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
+					    .len = IFNAMSIZ },
+	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	if (!tb[val_type])
+		return;
+	memcpy(val, nla_data(tb[val_type]), len);
+	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+		memset(mask, 0xff, len);
+	else
+		memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+		      struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+	int err;
+
+	if (tb[TCA_FLOWER_INDEV]) {
+		err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+		if (err < 0)
+			return err;
+		key->indev_ifindex = err;
+		mask->indev_ifindex = 0xffffffff;
+	}
+
+	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+		       sizeof(key->eth.dst));
+	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+		       sizeof(key->eth.src));
+	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+		       sizeof(key->basic.n_proto));
+	if (key->basic.n_proto == htons(ETH_P_IP) ||
+	    key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			       sizeof(key->basic.ip_proto));
+	}
+	if (key->basic.n_proto == htons(ETH_P_IP)) {
+		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			       sizeof(key->ipv4.src));
+		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			       sizeof(key->ipv4.dst));
+	} else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+			       sizeof(key->ipv6.src));
+		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+			       sizeof(key->ipv6.dst));
+	}
+	if (key->basic.ip_proto == IPPROTO_TCP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	} else if (key->basic.ip_proto == IPPROTO_UDP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	}
+
+	return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+		       struct fl_flow_mask *mask2)
+{
+	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+	.head_offset = offsetof(struct cls_fl_filter, ht_node),
+	.automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+			     struct fl_flow_mask *mask)
+{
+	head->ht_params = fl_ht_params;
+	head->ht_params.key_len = fl_mask_range(mask);
+	head->ht_params.key_offset += mask->range.start;
+
+	return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member)					\
+	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member)						\
+        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
+         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member)					\
+	do {									\
+		keys[cnt].key_id = id;						\
+		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
+		cnt++;								\
+	} while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+	do {									\
+		if (FL_KEY_IN_RANGE(mask, member))				\
+			FL_KEY_SET(keys, cnt, id, member);			\
+	} while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+			      struct fl_flow_mask *mask)
+{
+	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+	size_t cnt = 0;
+
+	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_PORTS, tp);
+
+	skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+				struct fl_flow_mask *mask)
+{
+	int err;
+
+	if (head->mask_assigned) {
+		if (!fl_mask_eq(&head->mask, mask))
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	/* Mask is not assigned yet. So assign it and init hashtable
+	 * according to that.
+	 */
+	err = fl_init_hashtable(head, mask);
+	if (err)
+		return err;
+	memcpy(&head->mask, mask, sizeof(head->mask));
+	head->mask_assigned = true;
+
+	fl_init_dissector(head, mask);
+
+	return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+			struct cls_fl_filter *f, struct fl_flow_mask *mask,
+			unsigned long base, struct nlattr **tb,
+			struct nlattr *est, bool ovr)
+{
+	struct tcf_exts e;
+	int err;
+
+	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOWER_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	err = fl_set_key(net, tb, &f->key, &mask->key);
+	if (err)
+		goto errout;
+
+	fl_mask_update_range(mask);
+	fl_set_masked_key(&f->mkey, &f->key, mask);
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(&e);
+	return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+			      struct cls_fl_head *head)
+{
+	unsigned int i = 0x80000000;
+	u32 handle;
+
+	do {
+		if (++head->hgen == 0x7FFFFFFF)
+			head->hgen = 1;
+	} while (--i > 0 && fl_get(tp, head->hgen));
+
+	if (unlikely(i == 0)) {
+		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
+
+	return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+		     struct tcf_proto *tp, unsigned long base,
+		     u32 handle, struct nlattr **tca,
+		     unsigned long *arg, bool ovr)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fnew;
+	struct nlattr *tb[TCA_FLOWER_MAX + 1];
+	struct fl_flow_mask mask = {};
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+	if (err < 0)
+		return err;
+
+	if (fold && handle && fold->handle != handle)
+		return -EINVAL;
+
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
+
+	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+	if (!handle) {
+		handle = fl_grab_new_handle(tp, head);
+		if (!handle) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+	fnew->handle = handle;
+
+	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+	if (err)
+		goto errout;
+
+	err = fl_check_assign_mask(head, &mask);
+	if (err)
+		goto errout;
+
+	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+				     head->ht_params);
+	if (err)
+		goto errout;
+	if (fold)
+		rhashtable_remove_fast(&head->ht, &fold->ht_node,
+				       head->ht_params);
+
+	*arg = (unsigned long) fnew;
+
+	if (fold) {
+		list_replace_rcu(&fnew->list, &fold->list);
+		tcf_unbind_filter(tp, &fold->res);
+		call_rcu(&fold->rcu, fl_destroy_filter);
+	} else {
+		list_add_tail_rcu(&fnew->list, &head->filters);
+	}
+
+	return 0;
+
+errout:
+	kfree(fnew);
+	return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+	rhashtable_remove_fast(&head->ht, &f->ht_node,
+			       head->ht_params);
+	list_del_rcu(&f->list);
+	tcf_unbind_filter(tp, &f->res);
+	call_rcu(&f->rcu, fl_destroy_filter);
+	return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry_rcu(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	int err;
+
+	if (!memchr_inv(mask, 0, len))
+		return 0;
+	err = nla_put(skb, val_type, len, val);
+	if (err)
+		return err;
+	if (mask_type != TCA_FLOWER_UNSPEC) {
+		err = nla_put(skb, mask_type, len, mask);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct nlattr *nest;
+	struct fl_flow_key *key, *mask;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	key = &f->key;
+	mask = &head->mask.key;
+
+	if (mask->indev_ifindex) {
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(net, key->indev_ifindex);
+		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+			goto nla_put_failure;
+	}
+
+	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+			    sizeof(key->eth.dst)) ||
+	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+			    sizeof(key->eth.src)) ||
+	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.n_proto)))
+		goto nla_put_failure;
+	if ((key->basic.n_proto == htons(ETH_P_IP) ||
+	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
+	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.ip_proto)))
+		goto nla_put_failure;
+
+	if (key->basic.n_proto == htons(ETH_P_IP) &&
+	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			     sizeof(key->ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			     sizeof(key->ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+				  sizeof(key->ipv6.src)) ||
+		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+				  sizeof(key->ipv6.dst))))
+		goto nla_put_failure;
+
+	if (key->basic.ip_proto == IPPROTO_TCP &&
+	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.src)) ||
+	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.dst))))
+		goto nla_put_failure;
+	else if (key->basic.ip_proto == IPPROTO_UDP &&
+		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.src)) ||
+		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.dst))))
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+	.kind		= "flower",
+	.classify	= fl_classify,
+	.init		= fl_init,
+	.destroy	= fl_destroy,
+	.get		= fl_get,
+	.change		= fl_change,
+	.delete		= fl_delete,
+	.walk		= fl_walk,
+	.dump		= fl_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+	return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index dfe3da75594c..93d5742dc7e0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
 
 /*
    CHOKe stateless AQM for fair bandwidth allocation
@@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1,
 
 	if (!choke_skb_cb(skb1)->keys_valid) {
 		choke_skb_cb(skb1)->keys_valid = 1;
-		skb_flow_dissect(skb1, &temp);
+		skb_flow_dissect_flow_keys(skb1, &temp);
 		make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 	}
 
 	if (!choke_skb_cb(skb2)->keys_valid) {
 		choke_skb_cb(skb2)->keys_valid = 1;
-		skb_flow_dissect(skb2, &temp);
+		skb_flow_dissect_flow_keys(skb2, &temp);
 		make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 	}
author	David S. Miller <davem@davemloft.net>	2015-05-13 15:19:48 -0400
committer	David S. Miller <davem@davemloft.net>	2015-05-13 15:19:48 -0400
commit	dd58c6359b38cff91100593ea4926adb19e85636 (patch)
tree	c3923d418d9323bfb7cf123eb57bea2f7c63cbdd
parent	212da1fa60a822f5a7f20d52d11524dadd192a91 (diff)
parent	77b9900ef53ae047e36a37d13a2aa33bb2d60641 (diff)