summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-13 15:19:48 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-13 15:19:48 -0400
commitdd58c6359b38cff91100593ea4926adb19e85636 (patch)
treec3923d418d9323bfb7cf123eb57bea2f7c63cbdd
parent212da1fa60a822f5a7f20d52d11524dadd192a91 (diff)
parent77b9900ef53ae047e36a37d13a2aa33bb2d60641 (diff)
Merge branch 'cls_flower'
Jiri Pirko says: ==================== introduce programable flow dissector and cls_flower Per Davem's request, I prepared this patchset which introduces programmable flow dissector. For current users of flow_keys, there is a wrapper skb_flow_dissect_flow_keys which maintains the previous behaviour. For purposes of cls_flower, couple of new dissection keys were introduced. Note that this dissector can be also eventually used by openvswitch code. Also, as a next step, I plan to get rid of *skb_flow_get_ports(export) and *__skb_get_poff as their functionality can be now implemented by skb_flow_dissect as well. v2->v3: - remove TCA_FLOWER_POLICE attr suggested by Jamal v1->v2: - move __skb_tx_hash rather to dev.c as suggested by Alex ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_main.c20
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_clsf.c29
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c10
-rw-r--r--drivers/net/hyperv/netvsc_drv.c8
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/skbuff.h14
-rw-r--r--include/net/flow_dissector.h167
-rw-r--r--include/net/flow_keys.h61
-rw-r--r--include/net/ip.h10
-rw-r--r--include/net/ipv6.h10
-rw-r--r--include/uapi/linux/pkt_cls.h30
-rw-r--r--net/core/dev.c106
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/flow_dissector.c380
-rw-r--r--net/ethernet/eth.c7
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/cls_flow.c22
-rw-r--r--net/sched/cls_flower.c688
-rw-r--r--net/sched/sch_choke.c6
20 files changed, 1285 insertions, 298 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a2e25de98bde..ef26e0147050 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -76,7 +76,7 @@
#include <net/netns/generic.h>
#include <net/pkt_sched.h>
#include <linux/rculist.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#include <net/switchdev.h>
#include <net/bonding.h>
#include <net/bond_3ad.h>
@@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
int noff, proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
- return skb_flow_dissect(skb, fk);
+ return skb_flow_dissect_flow_keys(skb, fk);
- fk->ports = 0;
+ fk->ports.ports = 0;
noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
return false;
iph = ip_hdr(skb);
- fk->src = iph->saddr;
- fk->dst = iph->daddr;
+ fk->addrs.src = iph->saddr;
+ fk->addrs.dst = iph->daddr;
noff += iph->ihl << 2;
if (!ip_is_fragment(iph))
proto = iph->protocol;
@@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
return false;
iph6 = ipv6_hdr(skb);
- fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
- fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+ fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
+ fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
noff += sizeof(*iph6);
proto = iph6->nexthdr;
} else {
return false;
}
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
- fk->ports = skb_flow_get_ports(skb, noff, proto);
+ fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
return true;
}
@@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
hash = bond_eth_hash(skb);
else
- hash = (__force u32)flow.ports;
- hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+ hash = (__force u32)flow.ports.ports;
+ hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src;
hash ^= (hash >> 16);
hash ^= (hash >> 8);
diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c
index 0be6850be8a2..6739ebc08c47 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.c
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c
@@ -5,7 +5,7 @@
#include <linux/in.h>
#include <linux/types.h>
#include <linux/skbuff.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#include "enic_res.h"
#include "enic_clsf.h"
@@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
int res;
struct filter data;
- switch (keys->ip_proto) {
+ switch (keys->basic.ip_proto) {
case IPPROTO_TCP:
data.u.ipv4.protocol = PROTO_TCP;
break;
@@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
return -EPROTONOSUPPORT;
};
data.type = FILTER_IPV4_5TUPLE;
- data.u.ipv4.src_addr = ntohl(keys->src);
- data.u.ipv4.dst_addr = ntohl(keys->dst);
- data.u.ipv4.src_port = ntohs(keys->port16[0]);
- data.u.ipv4.dst_port = ntohs(keys->port16[1]);
+ data.u.ipv4.src_addr = ntohl(keys->addrs.src);
+ data.u.ipv4.dst_addr = ntohl(keys->addrs.dst);
+ data.u.ipv4.src_port = ntohs(keys->ports.src);
+ data.u.ipv4.dst_port = ntohs(keys->ports.dst);
data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
spin_lock_bh(&enic->devcmd_lock);
@@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h,
struct enic_rfs_fltr_node *tpos;
hlist_for_each_entry(tpos, h, node)
- if (tpos->keys.src == k->src &&
- tpos->keys.dst == k->dst &&
- tpos->keys.ports == k->ports &&
- tpos->keys.ip_proto == k->ip_proto &&
- tpos->keys.n_proto == k->n_proto)
+ if (tpos->keys.addrs.src == k->addrs.src &&
+ tpos->keys.addrs.dst == k->addrs.dst &&
+ tpos->keys.ports.ports == k->ports.ports &&
+ tpos->keys.basic.ip_proto == k->basic.ip_proto &&
+ tpos->keys.basic.n_proto == k->basic.n_proto)
return tpos;
return NULL;
}
@@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
int res, i;
enic = netdev_priv(dev);
- res = skb_flow_dissect(skb, &keys);
- if (!res || keys.n_proto != htons(ETH_P_IP) ||
- (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP))
+ res = skb_flow_dissect_flow_keys(skb, &keys);
+ if (!res || keys.basic.n_proto != htons(ETH_P_IP) ||
+ (keys.basic.ip_proto != IPPROTO_TCP &&
+ keys.basic.ip_proto != IPPROTO_UDP))
return -EPROTONOSUPPORT;
tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK;
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 28d9ca675a27..117c0968dd0b 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
n = htbl_fltr_search(enic, (u16)fsp->location);
if (!n)
return -EINVAL;
- switch (n->keys.ip_proto) {
+ switch (n->keys.basic.ip_proto) {
case IPPROTO_TCP:
fsp->flow_type = TCP_V4_FLOW;
break;
@@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
break;
}
- fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src;
+ fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src;
fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0;
- fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst;
+ fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst;
fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0;
- fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0];
+ fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src;
fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0;
- fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1];
+ fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst;
fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0;
fsp->ring_cookie = n->rq_id;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5993c7e2d723..8e5fe888a0ec 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
struct flow_keys flow;
int data_len;
- if (!skb_flow_dissect(skb, &flow) ||
- !(flow.n_proto == htons(ETH_P_IP) ||
- flow.n_proto == htons(ETH_P_IPV6)))
+ if (!skb_flow_dissect_flow_keys(skb, &flow) ||
+ !(flow.basic.n_proto == htons(ETH_P_IP) ||
+ flow.basic.n_proto == htons(ETH_P_IPV6)))
return false;
- if (flow.ip_proto == IPPROTO_TCP)
+ if (flow.basic.ip_proto == IPPROTO_TCP)
data_len = 12;
else
data_len = 8;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cd0951c1893d..d3ed01c18247 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev,
}
#endif
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+ unsigned int num_tx_queues);
+
/*
* Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
* as a distribution range limit for the returned value.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0b574a414e7..f83aa6568cbf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,7 +34,7 @@
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
#include <linux/sched.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
/* A. Checksumming of received packets by device.
*
@@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
skb->hash = hash;
}
-void __skb_get_hash(struct sk_buff *skb);
static inline __u32 skb_get_hash(struct sk_buff *skb)
{
if (!skb->l4_hash && !skb->sw_hash)
@@ -1936,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
if (skb_transport_header_was_set(skb))
return;
- else if (skb_flow_dissect(skb, &keys))
- skb_set_transport_header(skb, keys.thoff);
+ else if (skb_flow_dissect_flow_keys(skb, &keys))
+ skb_set_transport_header(skb, keys.basic.thoff);
else
skb_set_transport_header(skb, offset_hint);
}
@@ -3300,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
return skb->queue_mapping != 0;
}
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues);
-
static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
@@ -3424,10 +3420,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
unsigned int transport_len,
__sum16(*skb_chkf)(struct sk_buff *skb));
-u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
- const struct flow_keys *keys, int hlen);
-
/**
* skb_head_is_locked - Determine if the skb->head is locked down
* @skb: skb to check
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
new file mode 100644
index 000000000000..bac9c1421f58
--- /dev/null
+++ b/include/net/flow_dissector.h
@@ -0,0 +1,167 @@
+#ifndef _NET_FLOW_DISSECTOR_H
+#define _NET_FLOW_DISSECTOR_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/in6.h>
+#include <uapi/linux/if_ether.h>
+
+/**
+ * struct flow_dissector_key_basic:
+ * @thoff: Transport header offset
+ * @n_proto: Network header protocol (eg. IPv4/IPv6)
+ * @ip_proto: Transport header protocol (eg. TCP/UDP)
+ */
+struct flow_dissector_key_basic {
+ u16 thoff;
+ __be16 n_proto;
+ u8 ip_proto;
+};
+
+/**
+ * struct flow_dissector_key_addrs:
+ * @src: source ip address in case of IPv4
+ * For IPv6 it contains 32bit hash of src address
+ * @dst: destination ip address in case of IPv4
+ * For IPv6 it contains 32bit hash of dst address
+ */
+struct flow_dissector_key_addrs {
+ /* (src,dst) must be grouped, in the same way than in IP header */
+ __be32 src;
+ __be32 dst;
+};
+
+/**
+ * flow_dissector_key_tp_ports:
+ * @ports: port numbers of Transport header
+ * src: source port number
+ * dst: destination port number
+ */
+struct flow_dissector_key_ports {
+ union {
+ __be32 ports;
+ struct {
+ __be16 src;
+ __be16 dst;
+ };
+ };
+};
+
+/**
+ * struct flow_dissector_key_ipv6_addrs:
+ * @src: source ip address
+ * @dst: destination ip address
+ */
+struct flow_dissector_key_ipv6_addrs {
+ /* (src,dst) must be grouped, in the same way than in IP header */
+ struct in6_addr src;
+ struct in6_addr dst;
+};
+
+/**
+ * struct flow_dissector_key_eth_addrs:
+ * @src: source Ethernet address
+ * @dst: destination Ethernet address
+ */
+struct flow_dissector_key_eth_addrs {
+ /* (dst,src) must be grouped, in the same way than in ETH header */
+ unsigned char dst[ETH_ALEN];
+ unsigned char src[ETH_ALEN];
+};
+
+enum flow_dissector_key_id {
+ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */
+ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
+
+ FLOW_DISSECTOR_KEY_MAX,
+};
+
+struct flow_dissector_key {
+ enum flow_dissector_key_id key_id;
+ size_t offset; /* offset of struct flow_dissector_key_*
+ in target the struct */
+};
+
+struct flow_dissector {
+ unsigned int used_keys; /* each bit repesents presence of one key id */
+ unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
+};
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+ const struct flow_dissector_key *key,
+ unsigned int key_count);
+
+bool __skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ void *data, __be16 proto, int nhoff, int hlen);
+
+static inline bool skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ return __skb_flow_dissect(skb, flow_dissector, target_container,
+ NULL, 0, 0, 0);
+}
+
+struct flow_keys {
+ struct flow_dissector_key_addrs addrs;
+ struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_basic basic;
+};
+
+extern struct flow_dissector flow_keys_dissector;
+extern struct flow_dissector flow_keys_buf_dissector;
+
+static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
+ struct flow_keys *flow)
+{
+ memset(flow, 0, sizeof(*flow));
+ return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
+ NULL, 0, 0, 0);
+}
+
+static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
+ void *data, __be16 proto,
+ int nhoff, int hlen)
+{
+ memset(flow, 0, sizeof(*flow));
+ return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+ data, proto, nhoff, hlen);
+}
+
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+ void *data, int hlen_proto);
+
+static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
+ int thoff, u8 ip_proto)
+{
+ return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
+}
+
+u32 flow_hash_from_keys(struct flow_keys *keys);
+void __skb_get_hash(struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ const struct flow_keys *keys, int hlen);
+
+/* struct flow_keys_digest:
+ *
+ * This structure is used to hold a digest of the full flow keys. This is a
+ * larger "hash" of a flow to allow definitively matching specific flows where
+ * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
+ * that it can by used in CB of skb (see sch_choke for an example).
+ */
+#define FLOW_KEYS_DIGEST_LEN 16
+struct flow_keys_digest {
+ u8 data[FLOW_KEYS_DIGEST_LEN];
+};
+
+void make_flow_keys_digest(struct flow_keys_digest *digest,
+ const struct flow_keys *flow);
+
+#endif
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
deleted file mode 100644
index 6d6ef626811a..000000000000
--- a/include/net/flow_keys.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _NET_FLOW_KEYS_H
-#define _NET_FLOW_KEYS_H
-
-/* struct flow_keys:
- * @src: source ip address in case of IPv4
- * For IPv6 it contains 32bit hash of src address
- * @dst: destination ip address in case of IPv4
- * For IPv6 it contains 32bit hash of dst address
- * @ports: port numbers of Transport header
- * port16[0]: src port number
- * port16[1]: dst port number
- * @thoff: Transport header offset
- * @n_proto: Network header protocol (eg. IPv4/IPv6)
- * @ip_proto: Transport header protocol (eg. TCP/UDP)
- * All the members, except thoff, are in network byte order.
- */
-struct flow_keys {
- /* (src,dst) must be grouped, in the same way than in IP header */
- __be32 src;
- __be32 dst;
- union {
- __be32 ports;
- __be16 port16[2];
- };
- u16 thoff;
- __be16 n_proto;
- u8 ip_proto;
-};
-
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
- void *data, __be16 proto, int nhoff, int hlen);
-static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
-{
- return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0);
-}
-__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
- void *data, int hlen_proto);
-static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
-{
- return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
-}
-u32 flow_hash_from_keys(struct flow_keys *keys);
-unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
- __be16 protocol);
-
-/* struct flow_keys_digest:
- *
- * This structure is used to hold a digest of the full flow keys. This is a
- * larger "hash" of a flow to allow definitively matching specific flows where
- * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
- * that it can by used in CB of skb (see sch_choke for an example).
- */
-#define FLOW_KEYS_DIGEST_LEN 16
-struct flow_keys_digest {
- u8 data[FLOW_KEYS_DIGEST_LEN];
-};
-
-void make_flow_keys_digest(struct flow_keys_digest *digest,
- const struct flow_keys *flow);
-
-#endif
diff --git a/include/net/ip.h b/include/net/ip.h
index d14af7edd197..0ed6d768e606 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -31,7 +31,7 @@
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
struct sock;
@@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
struct flow_keys keys;
- keys.src = inet->inet_saddr;
- keys.dst = inet->inet_daddr;
- keys.port16[0] = inet->inet_sport;
- keys.port16[1] = inet->inet_dport;
+ keys.addrs.src = inet->inet_saddr;
+ keys.addrs.dst = inet->inet_daddr;
+ keys.ports.src = inet->inet_sport;
+ keys.ports.dst = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys);
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 53d25ef1699a..aab8190d16e8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -19,7 +19,7 @@
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/flow.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#include <net/snmp.h>
#define SIN6_LEN_RFC2133 24
@@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk)
struct ipv6_pinfo *np = inet6_sk(sk);
struct flow_keys keys;
- keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
- keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
- keys.port16[0] = inet->inet_sport;
- keys.port16[1] = inet->inet_dport;
+ keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr);
+ keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
+ keys.ports.src = inet->inet_sport;
+ keys.ports.dst = inet->inet_dport;
sk->sk_txhash = flow_hash_from_keys(&keys);
}
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ffc112c8e1c2..39fb53d67b11 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -409,6 +409,36 @@ enum {
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
+/* Flower classifier */
+
+enum {
+ TCA_FLOWER_UNSPEC,
+ TCA_FLOWER_CLASSID,
+ TCA_FLOWER_INDEV,
+ TCA_FLOWER_ACT,
+ TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
+ TCA_FLOWER_KEY_IP_PROTO, /* u8 */
+ TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
+ TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_TCP_SRC, /* be16 */
+ TCA_FLOWER_KEY_TCP_DST, /* be16 */
+ TCA_FLOWER_KEY_UDP_SRC, /* be16 */
+ TCA_FLOWER_KEY_UDP_DST, /* be16 */
+ __TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
/* Extended Matches */
struct tcf_ematch_tree_hdr {
diff --git a/net/core/dev.c b/net/core/dev.c
index 90a568a150b4..af549062ae8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+ unsigned int num_tx_queues)
+{
+ u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
+
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ while (unlikely(hash >= num_tx_queues))
+ hash -= num_tx_queues;
+ return hash;
+ }
+
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
@@ -2908,6 +2936,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dev_loopback_xmit);
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ int queue_index = sk_tx_queue_get(sk);
+
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int new_index = get_xps_queue(dev, skb);
+ if (new_index < 0)
+ new_index = skb_tx_hash(dev, skb);
+
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
+
+ queue_index = new_index;
+ }
+
+ return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb,
+ void *accel_priv)
+{
+ int queue_index = 0;
+
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+ if (dev->real_num_tx_queues != 1) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
+ else
+ queue_index = __netdev_pick_tx(dev, skb);
+
+ if (!accel_priv)
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
/**
* __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
diff --git a/net/core/filter.c b/net/core/filter.c
index a831f193e2c7..6805717be614 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,6 +36,7 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d3acc4dff4ae..204d09c42510 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/export.h>
#include <linux/ip.h>
@@ -12,19 +13,57 @@
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
-#include <net/flow_keys.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
-/* copy saddr & daddr, possibly using 64bit load/store
- * Equivalent to : flow->src = iph->saddr;
- * flow->dst = iph->daddr;
- */
-static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
{
- BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
- offsetof(typeof(*flow), src) + sizeof(flow->src));
- memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+ return flow_dissector->used_keys & (1 << key_id);
+}
+
+static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ flow_dissector->used_keys |= (1 << key_id);
+}
+
+static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+ return ((char *) target_container) + flow_dissector->offset[key_id];
+}
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+ const struct flow_dissector_key *key,
+ unsigned int key_count)
+{
+ unsigned int i;
+
+ memset(flow_dissector, 0, sizeof(*flow_dissector));
+
+ for (i = 0; i < key_count; i++, key++) {
+ /* User should make sure that every key target offset is withing
+ * boundaries of unsigned short.
+ */
+ BUG_ON(key->offset > USHRT_MAX);
+ BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
+ key->key_id));
+
+ skb_flow_dissector_set_key(flow_dissector, key->key_id);
+ flow_dissector->offset[key->key_id] = key->offset;
+ }
+
+ /* Ensure that the dissector always includes basic key. That way
+ * we are able to avoid handling lack of it in fast path.
+ */
+ BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC));
}
+EXPORT_SYMBOL(skb_flow_dissector_init);
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @flow_dissector: list of keys to dissect
+ * @target_container: target structure to put dissected values into
* @data: raw buffer pointer to the packet, if NULL use skb->data
* @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
* @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
* @hlen: packet header length, if @data is NULL use skb_headlen(skb)
*
- * The function will try to retrieve the struct flow_keys from either the skbuff
- * or a raw buffer specified by the rest parameters
+ * The function will try to retrieve individual keys into target specified
+ * by flow_dissector from either the skbuff or a raw buffer specified by the
+ * rest parameters.
+ *
+ * Caller must take care of zeroing target container memory.
*/
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+bool __skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
void *data, __be16 proto, int nhoff, int hlen)
{
+ struct flow_dissector_key_basic *key_basic;
+ struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_ports *key_ports;
u8 ip_proto;
if (!data) {
@@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
hlen = skb_headlen(skb);
}
- memset(flow, 0, sizeof(*flow));
+ /* It is ensured by skb_flow_dissector_init() that basic key will
+ * be always present.
+ */
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct ethhdr *eth = eth_hdr(skb);
+ struct flow_dissector_key_eth_addrs *key_eth_addrs;
+
+ key_eth_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ target_container);
+ memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+ }
again:
switch (proto) {
@@ -100,14 +165,13 @@ ip:
if (ip_is_fragment(iph))
ip_proto = 0;
- /* skip the address processing if skb is NULL. The assumption
- * here is that if there is no skb we are not looking for flow
- * info but lengths and protocols.
- */
- if (!skb)
+ if (!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS))
break;
-
- iph_to_flow_copy_addrs(flow, iph);
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
+ memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
break;
}
case htons(ETH_P_IPV6): {
@@ -123,23 +187,47 @@ ipv6:
ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr);
- /* see comment above in IPv4 section */
- if (!skb)
- break;
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ target_container);
+
+ key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+ key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+ goto flow_label;
+ }
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
- flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
- flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+ key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
+ memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
+ goto flow_label;
+ }
+ break;
+flow_label:
flow_label = ip6_flowlabel(iph);
if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection.
*/
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->ports = flow_label;
- flow->thoff = (u16)nhoff;
+
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_basic->thoff = (u16)nhoff;
+
+ if (!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS))
+ break;
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->ports = flow_label;
return true;
}
@@ -186,14 +274,22 @@ ipv6:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
- flow->src = hdr->srcnode;
- flow->dst = 0;
- flow->n_proto = proto;
- flow->thoff = (u16)nhoff;
+ key_basic->n_proto = proto;
+ key_basic->thoff = (u16)nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+ return true;
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ target_container);
+ key_addrs->src = hdr->srcnode;
+ key_addrs->dst = 0;
+ }
return true;
}
case htons(ETH_P_FCOE):
- flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+ key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
default:
return false;
@@ -248,14 +344,24 @@ ipv6:
break;
}
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->thoff = (u16) nhoff;
-
- /* unless skb is set we don't need to record port info */
- if (skb)
- flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
- data, hlen);
+ /* It is ensured by skb_flow_dissector_init() that basic key will
+ * be always present.
+ */
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_basic->thoff = (u16) nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+ data, hlen);
+ }
return true;
}
@@ -277,16 +383,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
u32 hash;
/* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys->dst < (__force u32)keys->src) ||
- (((__force u32)keys->dst == (__force u32)keys->src) &&
- ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
- swap(keys->dst, keys->src);
- swap(keys->port16[0], keys->port16[1]);
+ if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
+ (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
+ ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) {
+ swap(keys->addrs.dst, keys->addrs.src);
+ swap(keys->ports.src, keys->ports.dst);
}
- hash = __flow_hash_3words((__force u32)keys->dst,
- (__force u32)keys->src,
- (__force u32)keys->ports,
+ hash = __flow_hash_3words((__force u32)keys->addrs.dst,
+ (__force u32)keys->addrs.src,
+ (__force u32)keys->ports.ports,
keyval);
if (!hash)
hash = 1;
@@ -304,7 +410,7 @@ EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys, u32 keyval)
{
- if (!skb_flow_dissect(skb, keys))
+ if (!skb_flow_dissect_flow_keys(skb, keys))
return 0;
return __flow_hash_from_keys(keys, keyval);
@@ -329,16 +435,19 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
memset(digest, 0, sizeof(*digest));
- data->n_proto = flow->n_proto;
- data->ip_proto = flow->ip_proto;
- data->ports = flow->ports;
- data->src = flow->src;
- data->dst = flow->dst;
+ data->n_proto = flow->basic.n_proto;
+ data->ip_proto = flow->basic.ip_proto;
+ data->ports = flow->ports.ports;
+ data->src = flow->addrs.src;
+ data->dst = flow->addrs.dst;
}
EXPORT_SYMBOL(make_flow_keys_digest);
-/*
- * __skb_get_hash: calculate a flow hash based on src/dst addresses
+/**
+ * __skb_get_hash: calculate a flow hash
+ * @skb: sk_buff to calculate flow hash from
+ *
+ * This function calculates a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value
* on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports.
@@ -353,12 +462,9 @@ void __skb_get_hash(struct sk_buff *skb)
hash = ___skb_get_hash(skb, &keys, hashrnd);
if (!hash)
return;
-
- if (keys.ports)
+ if (keys.ports.ports)
skb->l4_hash = 1;
-
skb->sw_hash = 1;
-
skb->hash = hash;
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -371,40 +477,12 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
}
EXPORT_SYMBOL(skb_get_hash_perturb);
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues)
-{
- u32 hash;
- u16 qoffset = 0;
- u16 qcount = num_tx_queues;
-
- if (skb_rx_queue_recorded(skb)) {
- hash = skb_get_rx_queue(skb);
- while (unlikely(hash >= num_tx_queues))
- hash -= num_tx_queues;
- return hash;
- }
-
- if (dev->num_tc) {
- u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
- qoffset = dev->tc_to_txq[tc].offset;
- qcount = dev->tc_to_txq[tc].count;
- }
-
- return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
-}
-EXPORT_SYMBOL(__skb_tx_hash);
-
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
- u32 poff = keys->thoff;
+ u32 poff = keys->basic.thoff;
- switch (keys->ip_proto) {
+ switch (keys->basic.ip_proto) {
case IPPROTO_TCP: {
/* access doff as u8 to avoid unaligned access */
const u8 *doff;
@@ -445,8 +523,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
return poff;
}
-/* skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
+/**
+ * skb_get_poff - get the offset to the payload
+ * @skb: sk_buff to get the payload offset from
+ *
+ * The function will get the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
* truncate packets without needing to push actual payload to the user
* space and can analyze headers only, instead.
*/
@@ -454,86 +536,52 @@ u32 skb_get_poff(const struct sk_buff *skb)
{
struct flow_keys keys;
- if (!skb_flow_dissect(skb, &keys))
+ if (!skb_flow_dissect_flow_keys(skb, &keys))
return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
-{
-#ifdef CONFIG_XPS
- struct xps_dev_maps *dev_maps;
- struct xps_map *map;
- int queue_index = -1;
-
- rcu_read_lock();
- dev_maps = rcu_dereference(dev->xps_maps);
- if (dev_maps) {
- map = rcu_dereference(
- dev_maps->cpu_map[skb->sender_cpu - 1]);
- if (map) {
- if (map->len == 1)
- queue_index = map->queues[0];
- else
- queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
- map->len)];
- if (unlikely(queue_index >= dev->real_num_tx_queues))
- queue_index = -1;
- }
- }
- rcu_read_unlock();
-
- return queue_index;
-#else
- return -1;
-#endif
-}
-
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- int queue_index = sk_tx_queue_get(sk);
-
- if (queue_index < 0 || skb->ooo_okay ||
- queue_index >= dev->real_num_tx_queues) {
- int new_index = get_xps_queue(dev, skb);
- if (new_index < 0)
- new_index = skb_tx_hash(dev, skb);
+static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+};
- if (queue_index != new_index && sk &&
- rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_index);
+static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+};
- queue_index = new_index;
- }
+struct flow_dissector flow_keys_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_dissector);
- return queue_index;
-}
+struct flow_dissector flow_keys_buf_dissector __read_mostly;
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
- struct sk_buff *skb,
- void *accel_priv)
+static int __init init_default_flow_dissectors(void)
{
- int queue_index = 0;
-
-#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
- skb->sender_cpu = raw_smp_processor_id() + 1;
-#endif
-
- if (dev->real_num_tx_queues != 1) {
- const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
- __netdev_pick_tx);
- else
- queue_index = __netdev_pick_tx(dev, skb);
-
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
- }
-
- skb_set_queue_mapping(skb, queue_index);
- return netdev_get_tx_queue(dev, queue_index);
+ skb_flow_dissector_init(&flow_keys_dissector,
+ flow_keys_dissector_keys,
+ ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_buf_dissector,
+ flow_keys_buf_dissector_keys,
+ ARRAY_SIZE(flow_keys_buf_dissector_keys));
+ return 0;
}
+
+late_initcall_sync(init_default_flow_dissectors);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9045e2a1108f..c3325bd2f3fb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
#include <net/ipv6.h>
#include <net/ip.h>
#include <net/dsa.h>
+#include <net/flow_dissector.h>
#include <linux/uaccess.h>
__setup("ether=", netdev_boot_setup);
@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
return len;
/* parse any remaining L2/L3 headers, check for L4 */
- if (!__skb_flow_dissect(NULL, &keys, data,
- eth->h_proto, sizeof(*eth), len))
- return max_t(u32, keys.thoff, sizeof(*eth));
+ if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
+ sizeof(*eth), len))
+ return max_t(u32, keys.basic.thoff, sizeof(*eth));
/* parse for any L4 headers */
return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..5fd1c2f487d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -477,6 +477,16 @@ config NET_CLS_BPF
To compile this code as a module, choose M here: the module will
be called cls_bpf.
+config NET_CLS_FLOWER
+ tristate "Flower classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ a configurable combination of packet keys and masks.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_flower.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e288a5..b4359924846c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->src)
- return ntohl(flow->src);
+ if (flow->addrs.src)
+ return ntohl(flow->addrs.src);
return addr_fold(skb->sk);
}
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->dst)
- return ntohl(flow->dst);
+ if (flow->addrs.dst)
+ return ntohl(flow->addrs.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- return flow->ip_proto;
+ return flow->basic.ip_proto;
}
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[0]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.src);
return addr_fold(skb->sk);
}
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[1]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
@@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED)
- skb_flow_dissect(skb, &flow_keys);
+ skb_flow_dissect_flow_keys(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..9bc654c764cd
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/cls_flower.c Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+ int indev_ifindex;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_eth_addrs eth;
+ union {
+ struct flow_dissector_key_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+ unsigned short int start;
+ unsigned short int end;
+};
+
+struct fl_flow_mask {
+ struct fl_flow_key key;
+ struct fl_flow_mask_range range;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_head {
+ struct rhashtable ht;
+ struct fl_flow_mask mask;
+ struct flow_dissector dissector;
+ u32 hgen;
+ bool mask_assigned;
+ struct list_head filters;
+ struct rhashtable_params ht_params;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+ struct rhash_head ht_node;
+ struct fl_flow_key mkey;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct fl_flow_key key;
+ struct list_head list;
+ u32 handle;
+ struct rcu_head rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+ return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+ const u8 *bytes = (const u8 *) &mask->key;
+ size_t size = sizeof(mask->key);
+ size_t i, first = 0, last = size - 1;
+
+ for (i = 0; i < sizeof(mask->key); i++) {
+ if (bytes[i]) {
+ if (!first && i)
+ first = i;
+ last = i;
+ }
+ }
+ mask->range.start = rounddown(first, sizeof(long));
+ mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+ const struct fl_flow_mask *mask)
+{
+ return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ const long *lkey = fl_key_get_start(key, mask);
+ const long *lmask = fl_key_get_start(&mask->key, mask);
+ long *lmkey = fl_key_get_start(mkey, mask);
+ int i;
+
+ for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+ *lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ struct cls_fl_filter *f;
+ struct fl_flow_key skb_key;
+ struct fl_flow_key skb_mkey;
+
+ fl_clear_masked_range(&skb_key, &head->mask);
+ skb_key.indev_ifindex = skb->skb_iif;
+ /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+ * so do it rather here.
+ */
+ skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+ fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+ f = rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(&skb_mkey, &head->mask),
+ head->ht_params);
+ if (f) {
+ *res = f->res;
+ return tcf_exts_exec(skb, &f->exts, res);
+ }
+ return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD_RCU(&head->filters);
+ rcu_assign_pointer(tp->root, head);
+
+ return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
+ kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f, *next;
+
+ if (!force && !list_empty(&head->filters))
+ return false;
+
+ list_for_each_entry_safe(f, next, &head->filters, list) {
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ }
+ RCU_INIT_POINTER(tp->root, NULL);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree_rcu(head, rcu);
+ return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry(f, &head->filters, list)
+ if (f->handle == handle)
+ return (unsigned long) f;
+ return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+ [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
+ [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ },
+ [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ if (!tb[val_type])
+ return;
+ memcpy(val, nla_data(tb[val_type]), len);
+ if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+ memset(mask, 0xff, len);
+ else
+ memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+ struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+ int err;
+
+ if (tb[TCA_FLOWER_INDEV]) {
+ err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ if (err < 0)
+ return err;
+ key->indev_ifindex = err;
+ mask->indev_ifindex = 0xffffffff;
+ }
+
+ fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst));
+ fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src));
+ fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
+ if (key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto));
+ }
+ if (key->basic.n_proto == htons(ETH_P_IP)) {
+ fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src));
+ fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src));
+ fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst));
+ }
+ if (key->basic.ip_proto == IPPROTO_TCP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_UDP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ }
+
+ return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+ struct fl_flow_mask *mask2)
+{
+ const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+ const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+ return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+ !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+ .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+ .head_offset = offsetof(struct cls_fl_filter, ht_node),
+ .automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ head->ht_params = fl_ht_params;
+ head->ht_params.key_len = fl_mask_range(mask);
+ head->ht_params.key_offset += mask->range.start;
+
+ return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member) \
+ (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member) \
+ (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
+ FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member) \
+ do { \
+ keys[cnt].key_id = id; \
+ keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
+ cnt++; \
+ } while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+ do { \
+ if (FL_KEY_IN_RANGE(mask, member)) \
+ FL_KEY_SET(keys, cnt, id, member); \
+ } while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+ size_t cnt = 0;
+
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ int err;
+
+ if (head->mask_assigned) {
+ if (!fl_mask_eq(&head->mask, mask))
+ return -EINVAL;
+ else
+ return 0;
+ }
+
+ /* Mask is not assigned yet. So assign it and init hashtable
+ * according to that.
+ */
+ err = fl_init_hashtable(head, mask);
+ if (err)
+ return err;
+ memcpy(&head->mask, mask, sizeof(head->mask));
+ head->mask_assigned = true;
+
+ fl_init_dissector(head, mask);
+
+ return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+ struct cls_fl_filter *f, struct fl_flow_mask *mask,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts e;
+ int err;
+
+ tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_FLOWER_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
+ err = fl_set_key(net, tb, &f->key, &mask->key);
+ if (err)
+ goto errout;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&f->mkey, &f->key, mask);
+
+ tcf_exts_change(tp, &f->exts, &e);
+
+ return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+ struct cls_fl_head *head)
+{
+ unsigned int i = 0x80000000;
+ u32 handle;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && fl_get(tp, head->hgen));
+
+ if (unlikely(i == 0)) {
+ pr_err("Insufficient number of handles\n");
+ handle = 0;
+ } else {
+ handle = head->hgen;
+ }
+
+ return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg, bool ovr)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+ struct cls_fl_filter *fnew;
+ struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct fl_flow_mask mask = {};
+ int err;
+
+ if (!tca[TCA_OPTIONS])
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ if (err < 0)
+ return err;
+
+ if (fold && handle && fold->handle != handle)
+ return -EINVAL;
+
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+ if (!handle) {
+ handle = fl_grab_new_handle(tp, head);
+ if (!handle) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ fnew->handle = handle;
+
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ if (err)
+ goto errout;
+
+ err = fl_check_assign_mask(head, &mask);
+ if (err)
+ goto errout;
+
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ if (fold)
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
+
+ *arg = (unsigned long) fnew;
+
+ if (fold) {
+ list_replace_rcu(&fnew->list, &fold->list);
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, fl_destroy_filter);
+ } else {
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ }
+
+ return 0;
+
+errout:
+ kfree(fnew);
+ return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
+ list_del_rcu(&f->list);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry_rcu(f, &head->filters, list) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ int err;
+
+ if (!memchr_inv(mask, 0, len))
+ return 0;
+ err = nla_put(skb, val_type, len, val);
+ if (err)
+ return err;
+ if (mask_type != TCA_FLOWER_UNSPEC) {
+ err = nla_put(skb, mask_type, len, mask);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+ struct nlattr *nest;
+ struct fl_flow_key *key, *mask;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+ goto nla_put_failure;
+
+ key = &f->key;
+ mask = &head->mask.key;
+
+ if (mask->indev_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, key->indev_ifindex);
+ if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+ goto nla_put_failure;
+ }
+
+ if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst)) ||
+ fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src)) ||
+ fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto)))
+ goto nla_put_failure;
+ if ((key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) &&
+ fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto)))
+ goto nla_put_failure;
+
+ if (key->basic.n_proto == htons(ETH_P_IP) &&
+ (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src)) ||
+ fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src)) ||
+ fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst))))
+ goto nla_put_failure;
+
+ if (key->basic.ip_proto == IPPROTO_TCP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_UDP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+ .kind = "flower",
+ .classify = fl_classify,
+ .init = fl_init,
+ .destroy = fl_destroy,
+ .get = fl_get,
+ .change = fl_change,
+ .delete = fl_delete,
+ .walk = fl_walk,
+ .dump = fl_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+ return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+ unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index dfe3da75594c..93d5742dc7e0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &temp);
+ skb_flow_dissect_flow_keys(skb1, &temp);
make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &temp);
+ skb_flow_dissect_flow_keys(skb2, &temp);
make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
}