diff options
author | David S. Miller <davem@davemloft.net> | 2015-05-13 15:19:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-13 15:19:48 -0400 |
commit | dd58c6359b38cff91100593ea4926adb19e85636 (patch) | |
tree | c3923d418d9323bfb7cf123eb57bea2f7c63cbdd | |
parent | 212da1fa60a822f5a7f20d52d11524dadd192a91 (diff) | |
parent | 77b9900ef53ae047e36a37d13a2aa33bb2d60641 (diff) |
Merge branch 'cls_flower'
Jiri Pirko says:
====================
introduce programable flow dissector and cls_flower
Per Davem's request, I prepared this patchset which introduces programmable
flow dissector. For current users of flow_keys, there is a wrapper
skb_flow_dissect_flow_keys which maintains the previous behaviour.
For purposes of cls_flower, couple of new dissection keys were introduced.
Note that this dissector can be also eventually used by openvswitch code.
Also, as a next step, I plan to get rid of *skb_flow_get_ports(export)
and *__skb_get_poff as their functionality can be now implemented by
skb_flow_dissect as well.
v2->v3:
- remove TCA_FLOWER_POLICE attr suggested by Jamal
v1->v2:
- move __skb_tx_hash rather to dev.c as suggested by Alex
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/bonding/bond_main.c | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_clsf.c | 29 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_ethtool.c | 10 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 8 | ||||
-rw-r--r-- | include/linux/netdevice.h | 3 | ||||
-rw-r--r-- | include/linux/skbuff.h | 14 | ||||
-rw-r--r-- | include/net/flow_dissector.h | 167 | ||||
-rw-r--r-- | include/net/flow_keys.h | 61 | ||||
-rw-r--r-- | include/net/ip.h | 10 | ||||
-rw-r--r-- | include/net/ipv6.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/pkt_cls.h | 30 | ||||
-rw-r--r-- | net/core/dev.c | 106 | ||||
-rw-r--r-- | net/core/filter.c | 1 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 380 | ||||
-rw-r--r-- | net/ethernet/eth.c | 7 | ||||
-rw-r--r-- | net/sched/Kconfig | 10 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/cls_flow.c | 22 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 688 | ||||
-rw-r--r-- | net/sched/sch_choke.c | 6 |
20 files changed, 1285 insertions, 298 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a2e25de98bde..ef26e0147050 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,7 +76,7 @@ #include <net/netns/generic.h> #include <net/pkt_sched.h> #include <linux/rculist.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include <net/switchdev.h> #include <net/bonding.h> #include <net/bond_3ad.h> @@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, int noff, proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect(skb, fk); + return skb_flow_dissect_flow_keys(skb, fk); - fk->ports = 0; + fk->ports.ports = 0; noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->src = iph->saddr; - fk->dst = iph->daddr; + fk->addrs.src = iph->saddr; + fk->addrs.dst = iph->daddr; noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { return false; } if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports = skb_flow_get_ports(skb, noff, proto); + fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) hash = bond_eth_hash(skb); else - hash = (__force u32)flow.ports; - hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash = (__force u32)flow.ports.ports; + hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src; hash ^= (hash >> 16); hash ^= (hash >> 8); diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 0be6850be8a2..6739ebc08c47 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -5,7 +5,7 @@ #include <linux/in.h> #include <linux/types.h> #include <linux/skbuff.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include "enic_res.h" #include "enic_clsf.h" @@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) int res; struct filter data; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: data.u.ipv4.protocol = PROTO_TCP; break; @@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) return -EPROTONOSUPPORT; }; data.type = FILTER_IPV4_5TUPLE; - data.u.ipv4.src_addr = ntohl(keys->src); - data.u.ipv4.dst_addr = ntohl(keys->dst); - data.u.ipv4.src_port = ntohs(keys->port16[0]); - data.u.ipv4.dst_port = ntohs(keys->port16[1]); + data.u.ipv4.src_addr = ntohl(keys->addrs.src); + data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); + data.u.ipv4.src_port = ntohs(keys->ports.src); + data.u.ipv4.dst_port = ntohs(keys->ports.dst); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; spin_lock_bh(&enic->devcmd_lock); @@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, struct enic_rfs_fltr_node *tpos; hlist_for_each_entry(tpos, h, node) - if (tpos->keys.src == k->src && - tpos->keys.dst == k->dst && - tpos->keys.ports == k->ports && - tpos->keys.ip_proto == k->ip_proto && - tpos->keys.n_proto == k->n_proto) + if (tpos->keys.addrs.src == k->addrs.src && + tpos->keys.addrs.dst == k->addrs.dst && + tpos->keys.ports.ports == k->ports.ports && + tpos->keys.basic.ip_proto == k->basic.ip_proto && + tpos->keys.basic.n_proto == k->basic.n_proto) return tpos; return NULL; } @@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, int res, i; enic = netdev_priv(dev); - res = skb_flow_dissect(skb, &keys); - if (!res || keys.n_proto != htons(ETH_P_IP) || - (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) + res = skb_flow_dissect_flow_keys(skb, &keys); + if (!res || keys.basic.n_proto != htons(ETH_P_IP) || + (keys.basic.ip_proto != IPPROTO_TCP && + keys.basic.ip_proto != IPPROTO_UDP)) return -EPROTONOSUPPORT; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca675a27..117c0968dd0b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) n = htbl_fltr_search(enic, (u16)fsp->location); if (!n) return -EINVAL; - switch (n->keys.ip_proto) { + switch (n->keys.basic.ip_proto) { case IPPROTO_TCP: fsp->flow_type = TCP_V4_FLOW; break; @@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; } - fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src; + fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src; fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; - fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst; + fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; - fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0]; + fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; - fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1]; + fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.dst; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->ring_cookie = n->rq_id; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5993c7e2d723..8e5fe888a0ec 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) struct flow_keys flow; int data_len; - if (!skb_flow_dissect(skb, &flow) || - !(flow.n_proto == htons(ETH_P_IP) || - flow.n_proto == htons(ETH_P_IPV6))) + if (!skb_flow_dissect_flow_keys(skb, &flow) || + !(flow.basic.n_proto == htons(ETH_P_IP) || + flow.basic.n_proto == htons(ETH_P_IPV6))) return false; - if (flow.ip_proto == IPPROTO_TCP) + if (flow.basic.ip_proto == IPPROTO_TCP) data_len = 12; else data_len = 8; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd0951c1893d..d3ed01c18247 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues); + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..f83aa6568cbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,7 +34,7 @@ #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <linux/sched.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> /* A. Checksumming of received packets by device. * @@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb->hash = hash; } -void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) @@ -1936,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys)) + skb_set_transport_header(skb, keys.basic.thoff); else skb_set_transport_header(skb, offset_hint); } @@ -3300,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM @@ -3424,10 +3420,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); - /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h new file mode 100644 index 000000000000..bac9c1421f58 --- /dev/null +++ b/include/net/flow_dissector.h @@ -0,0 +1,167 @@ +#ifndef _NET_FLOW_DISSECTOR_H +#define _NET_FLOW_DISSECTOR_H + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/in6.h> +#include <uapi/linux/if_ether.h> + +/** + * struct flow_dissector_key_basic: + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + */ +struct flow_dissector_key_basic { + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +/** + * struct flow_dissector_key_addrs: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + */ +struct flow_dissector_key_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; +}; + +/** + * flow_dissector_key_tp_ports: + * @ports: port numbers of Transport header + * src: source port number + * dst: destination port number + */ +struct flow_dissector_key_ports { + union { + __be32 ports; + struct { + __be16 src; + __be16 dst; + }; + }; +}; + +/** + * struct flow_dissector_key_ipv6_addrs: + * @src: source ip address + * @dst: destination ip address + */ +struct flow_dissector_key_ipv6_addrs { + /* (src,dst) must be grouped, in the same way than in IP header */ + struct in6_addr src; + struct in6_addr dst; +}; + +/** + * struct flow_dissector_key_eth_addrs: + * @src: source Ethernet address + * @dst: destination Ethernet address + */ +struct flow_dissector_key_eth_addrs { + /* (dst,src) must be grouped, in the same way than in ETH header */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; +}; + +enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ + FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ + FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ + + FLOW_DISSECTOR_KEY_MAX, +}; + +struct flow_dissector_key { + enum flow_dissector_key_id key_id; + size_t offset; /* offset of struct flow_dissector_key_* + in target the struct */ +}; + +struct flow_dissector { + unsigned int used_keys; /* each bit repesents presence of one key id */ + unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; +}; + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count); + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + void *data, __be16 proto, int nhoff, int hlen); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0); +} + +struct flow_keys { + struct flow_dissector_key_addrs addrs; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_basic basic; +}; + +extern struct flow_dissector flow_keys_dissector; +extern struct flow_dissector flow_keys_buf_dissector; + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen); +} + +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); +void __skb_get_hash(struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h deleted file mode 100644 index 6d6ef626811a..000000000000 --- a/include/net/flow_keys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _NET_FLOW_KEYS_H -#define _NET_FLOW_KEYS_H - -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, - void *data, __be16 proto, int nhoff, int hlen); -static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) -{ - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); -} -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} -u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - -/* struct flow_keys_digest: - * - * This structure is used to hold a digest of the full flow keys. This is a - * larger "hash" of a flow to allow definitively matching specific flows where - * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so - * that it can by used in CB of skb (see sch_choke for an example). - */ -#define FLOW_KEYS_DIGEST_LEN 16 -struct flow_keys_digest { - u8 data[FLOW_KEYS_DIGEST_LEN]; -}; - -void make_flow_keys_digest(struct flow_keys_digest *digest, - const struct flow_keys *flow); - -#endif diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197..0ed6d768e606 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,7 +31,7 @@ #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> struct sock; @@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; - keys.src = inet->inet_saddr; - keys.dst = inet->inet_daddr; - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = inet->inet_saddr; + keys.addrs.dst = inet->inet_daddr; + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53d25ef1699a..aab8190d16e8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,7 +19,7 @@ #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #include <net/snmp.h> #define SIN6_LEN_RFC2133 24 @@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; - keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.ports.src = inet->inet_sport; + keys.ports.dst = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ffc112c8e1c2..39fb53d67b11 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -409,6 +409,36 @@ enum { #define TCA_BPF_MAX (__TCA_BPF_MAX - 1) +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { diff --git a/net/core/dev.c b/net/core/dev.c index 90a568a150b4..af549062ae8e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; @@ -2908,6 +2936,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dev_loopback_xmit); +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[skb->sender_cpu - 1]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else + queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), + map->len)]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); + + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); + + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, new_index); + + queue_index = new_index; + } + + return queue_index; +} + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb, + void *accel_priv) +{ + int queue_index = 0; + +#ifdef CONFIG_XPS + if (skb->sender_cpu == 0) + skb->sender_cpu = raw_smp_processor_id() + 1; +#endif + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb, accel_priv, + __netdev_pick_tx); + else + queue_index = __netdev_pick_tx(dev, skb); + + if (!accel_priv) + queue_index = netdev_cap_txqueue(dev, queue_index); + } + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + /** * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit diff --git a/net/core/filter.c b/net/core/filter.c index a831f193e2c7..6805717be614 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,6 +36,7 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/flow_dissector.h> #include <linux/errno.h> #include <linux/timer.h> #include <asm/uaccess.h> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d3acc4dff4ae..204d09c42510 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1,3 +1,4 @@ +#include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/ip.h> @@ -12,19 +13,57 @@ #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> -#include <net/flow_keys.h> +#include <linux/stddef.h> +#include <linux/if_ether.h> +#include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> -/* copy saddr & daddr, possibly using 64bit load/store - * Equivalent to : flow->src = iph->saddr; - * flow->dst = iph->daddr; - */ -static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) +static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) { - BUILD_BUG_ON(offsetof(typeof(*flow), dst) != - offsetof(typeof(*flow), src) + sizeof(flow->src)); - memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); + return flow_dissector->used_keys & (1 << key_id); +} + +static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id) +{ + flow_dissector->used_keys |= (1 << key_id); +} + +static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, + enum flow_dissector_key_id key_id, + void *target_container) +{ + return ((char *) target_container) + flow_dissector->offset[key_id]; +} + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count) +{ + unsigned int i; + + memset(flow_dissector, 0, sizeof(*flow_dissector)); + + for (i = 0; i < key_count; i++, key++) { + /* User should make sure that every key target offset is withing + * boundaries of unsigned short. + */ + BUG_ON(key->offset > USHRT_MAX); + BUG_ON(skb_flow_dissector_uses_key(flow_dissector, + key->key_id)); + + skb_flow_dissector_set_key(flow_dissector, key->key_id); + flow_dissector->offset[key->key_id] = key->offset; + } + + /* Ensure that the dissector always includes basic key. That way + * we are able to avoid handling lack of it in fast path. + */ + BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC)); } +EXPORT_SYMBOL(skb_flow_dissector_init); /** * __skb_flow_get_ports - extract the upper layer ports and return them @@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports); /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified + * @flow_dissector: list of keys to dissect + * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * - * The function will try to retrieve the struct flow_keys from either the skbuff - * or a raw buffer specified by the rest parameters + * The function will try to retrieve individual keys into target specified + * by flow_dissector from either the skbuff or a raw buffer specified by the + * rest parameters. + * + * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_basic *key_basic; + struct flow_dissector_key_addrs *key_addrs; + struct flow_dissector_key_ports *key_ports; u8 ip_proto; if (!data) { @@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, hlen = skb_headlen(skb); } - memset(flow, 0, sizeof(*flow)); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct ethhdr *eth = eth_hdr(skb); + struct flow_dissector_key_eth_addrs *key_eth_addrs; + + key_eth_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + target_container); + memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); + } again: switch (proto) { @@ -100,14 +165,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; - /* skip the address processing if skb is NULL. The assumption - * here is that if there is no skb we are not looking for flow - * info but lengths and protocols. - */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS)) break; - - iph_to_flow_copy_addrs(flow, iph); + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + target_container); + memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs)); break; } case htons(ETH_P_IPV6): { @@ -123,23 +187,47 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - /* see comment above in IPv4 section */ - if (!skb) - break; + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + + key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + goto flow_label; + } + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + target_container); + memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); + goto flow_label; + } + break; +flow_label: flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can * use that to represent the ports without any * further dissection. */ - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->ports = flow_label; - flow->thoff = (u16)nhoff; + + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16)nhoff; + + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) + break; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = flow_label; return true; } @@ -186,14 +274,22 @@ ipv6: hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; - flow->src = hdr->srcnode; - flow->dst = 0; - flow->n_proto = proto; - flow->thoff = (u16)nhoff; + key_basic->n_proto = proto; + key_basic->thoff = (u16)nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + return true; + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + key_addrs->src = hdr->srcnode; + key_addrs->dst = 0; + } return true; } case htons(ETH_P_FCOE): - flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -248,14 +344,24 @@ ipv6: break; } - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->thoff = (u16) nhoff; - - /* unless skb is set we don't need to record port info */ - if (skb) - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16) nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + } return true; } @@ -277,16 +383,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 hash; /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys->dst < (__force u32)keys->src) || - (((__force u32)keys->dst == (__force u32)keys->src) && - ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { - swap(keys->dst, keys->src); - swap(keys->port16[0], keys->port16[1]); + if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || + (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && + ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) { + swap(keys->addrs.dst, keys->addrs.src); + swap(keys->ports.src, keys->ports.dst); } - hash = __flow_hash_3words((__force u32)keys->dst, - (__force u32)keys->src, - (__force u32)keys->ports, + hash = __flow_hash_3words((__force u32)keys->addrs.dst, + (__force u32)keys->addrs.src, + (__force u32)keys->ports.ports, keyval); if (!hash) hash = 1; @@ -304,7 +410,7 @@ EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, u32 keyval) { - if (!skb_flow_dissect(skb, keys)) + if (!skb_flow_dissect_flow_keys(skb, keys)) return 0; return __flow_hash_from_keys(keys, keyval); @@ -329,16 +435,19 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, memset(digest, 0, sizeof(*digest)); - data->n_proto = flow->n_proto; - data->ip_proto = flow->ip_proto; - data->ports = flow->ports; - data->src = flow->src; - data->dst = flow->dst; + data->n_proto = flow->basic.n_proto; + data->ip_proto = flow->basic.ip_proto; + data->ports = flow->ports.ports; + data->src = flow->addrs.src; + data->dst = flow->addrs.dst; } EXPORT_SYMBOL(make_flow_keys_digest); -/* - * __skb_get_hash: calculate a flow hash based on src/dst addresses +/** + * __skb_get_hash: calculate a flow hash + * @skb: sk_buff to calculate flow hash from + * + * This function calculates a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. @@ -353,12 +462,9 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - - if (keys.ports) + if (keys.ports.ports) skb->l4_hash = 1; - skb->sw_hash = 1; - skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -371,40 +477,12 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) -{ - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; - - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } - - return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; -} -EXPORT_SYMBOL(__skb_tx_hash); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->thoff; + u32 poff = keys->basic.thoff; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff; @@ -445,8 +523,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, return poff; } -/* skb_get_poff() returns the offset to the payload as far as it could - * be dissected. The main user is currently BPF, so that we can dynamically +/** + * skb_get_poff - get the offset to the payload + * @skb: sk_buff to get the payload offset from + * + * The function will get the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically * truncate packets without needing to push actual payload to the user * space and can analyze headers only, instead. */ @@ -454,86 +536,52 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys keys; - if (!skb_flow_dissect(skb, &keys)) + if (!skb_flow_dissect_flow_keys(skb, &keys)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } -static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) -{ -#ifdef CONFIG_XPS - struct xps_dev_maps *dev_maps; - struct xps_map *map; - int queue_index = -1; - - rcu_read_lock(); - dev_maps = rcu_dereference(dev->xps_maps); - if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); - if (map) { - if (map->len == 1) - queue_index = map->queues[0]; - else - queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), - map->len)]; - if (unlikely(queue_index >= dev->real_num_tx_queues)) - queue_index = -1; - } - } - rcu_read_unlock(); - - return queue_index; -#else - return -1; -#endif -} - -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - int queue_index = sk_tx_queue_get(sk); - - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int new_index = get_xps_queue(dev, skb); - if (new_index < 0) - new_index = skb_tx_hash(dev, skb); +static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; - if (queue_index != new_index && sk && - rcu_access_pointer(sk->sk_dst_cache)) - sk_tx_queue_set(sk, new_index); +static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, +}; - queue_index = new_index; - } +struct flow_dissector flow_keys_dissector __read_mostly; +EXPORT_SYMBOL(flow_keys_dissector); - return queue_index; -} +struct flow_dissector flow_keys_buf_dissector __read_mostly; -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - void *accel_priv) +static int __init init_default_flow_dissectors(void) { - int queue_index = 0; - -#ifdef CONFIG_XPS - if (skb->sender_cpu == 0) - skb->sender_cpu = raw_smp_processor_id() + 1; -#endif - - if (dev->real_num_tx_queues != 1) { - const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, accel_priv, - __netdev_pick_tx); - else - queue_index = __netdev_pick_tx(dev, skb); - - if (!accel_priv) - queue_index = netdev_cap_txqueue(dev, queue_index); - } - - skb_set_queue_mapping(skb, queue_index); - return netdev_get_tx_queue(dev, queue_index); + skb_flow_dissector_init(&flow_keys_dissector, + flow_keys_dissector_keys, + ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_buf_dissector, + flow_keys_buf_dissector_keys, + ARRAY_SIZE(flow_keys_buf_dissector_keys)); + return 0; } + +late_initcall_sync(init_default_flow_dissectors); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9045e2a1108f..c3325bd2f3fb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,6 +58,7 @@ #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> +#include <net/flow_dissector.h> #include <linux/uaccess.h> __setup("ether=", netdev_boot_setup); @@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!__skb_flow_dissect(NULL, &keys, data, - eth->h_proto, sizeof(*eth), len)) - return max_t(u32, keys.thoff, sizeof(*eth)); + if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, + sizeof(*eth), len)) + return max_t(u32, keys.basic.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2274e723a3df..5fd1c2f487d2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -477,6 +477,16 @@ config NET_CLS_BPF To compile this code as a module, choose M here: the module will be called cls_bpf. +config NET_CLS_FLOWER + tristate "Flower classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + a configurable combination of packet keys and masks. + + To compile this code as a module, choose M here: the module will + be called cls_flower. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 7ca7f4c1b8c2..690c1689e090 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o +obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a5..b4359924846c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -26,7 +26,7 @@ #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netfilter/nf_conntrack.h> @@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->src) - return ntohl(flow->src); + if (flow->addrs.src) + return ntohl(flow->addrs.src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->dst) - return ntohl(flow->dst); + if (flow->addrs.dst) + return ntohl(flow->addrs.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { - return flow->ip_proto; + return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[0]); + if (flow->ports.ports) + return ntohs(flow->ports.src); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[1]); + if (flow->ports.ports) + return ntohs(flow->ports.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } @@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) - skb_flow_dissect(skb, &flow_keys); + skb_flow_dissect_flow_keys(skb, &flow_keys); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c new file mode 100644 index 000000000000..9bc654c764cd --- /dev/null +++ b/net/sched/cls_flower.c @@ -0,0 +1,688 @@ +/* + * net/sched/cls_flower.c Flower classifier + * + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/rhashtable.h> + +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/ip.h> + +#include <net/sch_generic.h> +#include <net/pkt_cls.h> +#include <net/ip.h> +#include <net/flow_dissector.h> + +struct fl_flow_key { + int indev_ifindex; + struct flow_dissector_key_basic basic; + struct flow_dissector_key_eth_addrs eth; + union { + struct flow_dissector_key_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct fl_flow_mask_range { + unsigned short int start; + unsigned short int end; +}; + +struct fl_flow_mask { + struct fl_flow_key key; + struct fl_flow_mask_range range; + struct rcu_head rcu; +}; + +struct cls_fl_head { + struct rhashtable ht; + struct fl_flow_mask mask; + struct flow_dissector dissector; + u32 hgen; + bool mask_assigned; + struct list_head filters; + struct rhashtable_params ht_params; + struct rcu_head rcu; +}; + +struct cls_fl_filter { + struct rhash_head ht_node; + struct fl_flow_key mkey; + struct tcf_exts exts; + struct tcf_result res; + struct fl_flow_key key; + struct list_head list; + u32 handle; + struct rcu_head rcu; +}; + +static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) +{ + return mask->range.end - mask->range.start; +} + +static void fl_mask_update_range(struct fl_flow_mask *mask) +{ + const u8 *bytes = (const u8 *) &mask->key; + size_t size = sizeof(mask->key); + size_t i, first = 0, last = size - 1; + + for (i = 0; i < sizeof(mask->key); i++) { + if (bytes[i]) { + if (!first && i) + first = i; + last = i; + } + } + mask->range.start = rounddown(first, sizeof(long)); + mask->range.end = roundup(last + 1, sizeof(long)); +} + +static void *fl_key_get_start(struct fl_flow_key *key, + const struct fl_flow_mask *mask) +{ + return (u8 *) key + mask->range.start; +} + +static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + const long *lkey = fl_key_get_start(key, mask); + const long *lmask = fl_key_get_start(&mask->key, mask); + long *lmkey = fl_key_get_start(mkey, mask); + int i; + + for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) + *lmkey++ = *lkey++ & *lmask++; +} + +static void fl_clear_masked_range(struct fl_flow_key *key, + struct fl_flow_mask *mask) +{ + memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); +} + +static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_fl_head *head = rcu_dereference_bh(tp->root); + struct cls_fl_filter *f; + struct fl_flow_key skb_key; + struct fl_flow_key skb_mkey; + + fl_clear_masked_range(&skb_key, &head->mask); + skb_key.indev_ifindex = skb->skb_iif; + /* skb_flow_dissect() does not set n_proto in case an unknown protocol, + * so do it rather here. + */ + skb_key.basic.n_proto = skb->protocol; + skb_flow_dissect(skb, &head->dissector, &skb_key); + + fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); + + f = rhashtable_lookup_fast(&head->ht, + fl_key_get_start(&skb_mkey, &head->mask), + head->ht_params); + if (f) { + *res = f->res; + return tcf_exts_exec(skb, &f->exts, res); + } + return -1; +} + +static int fl_init(struct tcf_proto *tp) +{ + struct cls_fl_head *head; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + + INIT_LIST_HEAD_RCU(&head->filters); + rcu_assign_pointer(tp->root, head); + + return 0; +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + tcf_exts_destroy(&f->exts); + kfree(f); +} + +static bool fl_destroy(struct tcf_proto *tp, bool force) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f, *next; + + if (!force && !list_empty(&head->filters)) + return false; + + list_for_each_entry_safe(f, next, &head->filters, list) { + list_del_rcu(&f->list); + call_rcu(&f->rcu, fl_destroy_filter); + } + RCU_INIT_POINTER(tp->root, NULL); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree_rcu(head, rcu); + return true; +} + +static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry(f, &head->filters, list) + if (f->handle == handle) + return (unsigned long) f; + return 0; +} + +static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { + [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, + [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, + [TCA_FLOWER_INDEV] = { .type = NLA_STRING, + .len = IFNAMSIZ }, + [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, +}; + +static void fl_set_key_val(struct nlattr **tb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + if (!tb[val_type]) + return; + memcpy(val, nla_data(tb[val_type]), len); + if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type]) + memset(mask, 0xff, len); + else + memcpy(mask, nla_data(tb[mask_type]), len); +} + +static int fl_set_key(struct net *net, struct nlattr **tb, + struct fl_flow_key *key, struct fl_flow_key *mask) +{ + int err; + + if (tb[TCA_FLOWER_INDEV]) { + err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); + if (err < 0) + return err; + key->indev_ifindex = err; + mask->indev_ifindex = 0xffffffff; + } + + fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)); + fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)); + fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + if (key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto)); + } + if (key->basic.n_proto == htons(ETH_P_IP)) { + fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)); + fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)); + } else if (key->basic.n_proto == htons(ETH_P_IPV6)) { + fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)); + fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)); + } + if (key->basic.ip_proto == IPPROTO_TCP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_UDP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)); + } + + return 0; +} + +static bool fl_mask_eq(struct fl_flow_mask *mask1, + struct fl_flow_mask *mask2) +{ + const long *lmask1 = fl_key_get_start(&mask1->key, mask1); + const long *lmask2 = fl_key_get_start(&mask2->key, mask2); + + return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) && + !memcmp(lmask1, lmask2, fl_mask_range(mask1)); +} + +static const struct rhashtable_params fl_ht_params = { + .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */ + .head_offset = offsetof(struct cls_fl_filter, ht_node), + .automatic_shrinking = true, +}; + +static int fl_init_hashtable(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + head->ht_params = fl_ht_params; + head->ht_params.key_len = fl_mask_range(mask); + head->ht_params.key_offset += mask->range.start; + + return rhashtable_init(&head->ht, &head->ht_params); +} + +#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) +#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) +#define FL_KEY_MEMBER_END_OFFSET(member) \ + (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) + +#define FL_KEY_IN_RANGE(mask, member) \ + (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ + FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) + +#define FL_KEY_SET(keys, cnt, id, member) \ + do { \ + keys[cnt].key_id = id; \ + keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \ + cnt++; \ + } while(0); + +#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ + do { \ + if (FL_KEY_IN_RANGE(mask, member)) \ + FL_KEY_SET(keys, cnt, id, member); \ + } while(0); + +static void fl_init_dissector(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; + size_t cnt = 0; + + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); + FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); + + skb_flow_dissector_init(&head->dissector, keys, cnt); +} + +static int fl_check_assign_mask(struct cls_fl_head *head, + struct fl_flow_mask *mask) +{ + int err; + + if (head->mask_assigned) { + if (!fl_mask_eq(&head->mask, mask)) + return -EINVAL; + else + return 0; + } + + /* Mask is not assigned yet. So assign it and init hashtable + * according to that. + */ + err = fl_init_hashtable(head, mask); + if (err) + return err; + memcpy(&head->mask, mask, sizeof(head->mask)); + head->mask_assigned = true; + + fl_init_dissector(head, mask); + + return 0; +} + +static int fl_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_fl_filter *f, struct fl_flow_mask *mask, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts e; + int err; + + tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + return err; + + if (tb[TCA_FLOWER_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + + err = fl_set_key(net, tb, &f->key, &mask->key); + if (err) + goto errout; + + fl_mask_update_range(mask); + fl_set_masked_key(&f->mkey, &f->key, mask); + + tcf_exts_change(tp, &f->exts, &e); + + return 0; +errout: + tcf_exts_destroy(&e); + return err; +} + +static u32 fl_grab_new_handle(struct tcf_proto *tp, + struct cls_fl_head *head) +{ + unsigned int i = 0x80000000; + u32 handle; + + do { + if (++head->hgen == 0x7FFFFFFF) + head->hgen = 1; + } while (--i > 0 && fl_get(tp, head->hgen)); + + if (unlikely(i == 0)) { + pr_err("Insufficient number of handles\n"); + handle = 0; + } else { + handle = head->hgen; + } + + return handle; +} + +static int fl_change(struct net *net, struct sk_buff *in_skb, + struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg, bool ovr) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fnew; + struct nlattr *tb[TCA_FLOWER_MAX + 1]; + struct fl_flow_mask mask = {}; + int err; + + if (!tca[TCA_OPTIONS]) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); + if (err < 0) + return err; + + if (fold && handle && fold->handle != handle) + return -EINVAL; + + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; + + tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + + if (!handle) { + handle = fl_grab_new_handle(tp, head); + if (!handle) { + err = -EINVAL; + goto errout; + } + } + fnew->handle = handle; + + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); + if (err) + goto errout; + + err = fl_check_assign_mask(head, &mask); + if (err) + goto errout; + + err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, + head->ht_params); + if (err) + goto errout; + if (fold) + rhashtable_remove_fast(&head->ht, &fold->ht_node, + head->ht_params); + + *arg = (unsigned long) fnew; + + if (fold) { + list_replace_rcu(&fnew->list, &fold->list); + tcf_unbind_filter(tp, &fold->res); + call_rcu(&fold->rcu, fl_destroy_filter); + } else { + list_add_tail_rcu(&fnew->list, &head->filters); + } + + return 0; + +errout: + kfree(fnew); + return err; +} + +static int fl_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + + rhashtable_remove_fast(&head->ht, &f->ht_node, + head->ht_params); + list_del_rcu(&f->list); + tcf_unbind_filter(tp, &f->res); + call_rcu(&f->rcu, fl_destroy_filter); + return 0; +} + +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f; + + list_for_each_entry_rcu(f, &head->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, (unsigned long) f, arg) < 0) { + arg->stop = 1; + break; + } +skip: + arg->count++; + } +} + +static int fl_dump_key_val(struct sk_buff *skb, + void *val, int val_type, + void *mask, int mask_type, int len) +{ + int err; + + if (!memchr_inv(mask, 0, len)) + return 0; + err = nla_put(skb, val_type, len, val); + if (err) + return err; + if (mask_type != TCA_FLOWER_UNSPEC) { + err = nla_put(skb, mask_type, len, mask); + if (err) + return err; + } + return 0; +} + +static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct nlattr *nest; + struct fl_flow_key *key, *mask; + + if (!f) + return skb->len; + + t->tcm_handle = f->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + if (f->res.classid && + nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) + goto nla_put_failure; + + key = &f->key; + mask = &head->mask.key; + + if (mask->indev_ifindex) { + struct net_device *dev; + + dev = __dev_get_by_index(net, key->indev_ifindex); + if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name)) + goto nla_put_failure; + } + + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, + mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, + sizeof(key->eth.dst)) || + fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, + mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, + sizeof(key->eth.src)) || + fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto))) + goto nla_put_failure; + if ((key->basic.n_proto == htons(ETH_P_IP) || + key->basic.n_proto == htons(ETH_P_IPV6)) && + fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, + &mask->basic.ip_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.ip_proto))) + goto nla_put_failure; + + if (key->basic.n_proto == htons(ETH_P_IP) && + (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, + &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, + sizeof(key->ipv4.src)) || + fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST, + &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK, + sizeof(key->ipv4.dst)))) + goto nla_put_failure; + else if (key->basic.n_proto == htons(ETH_P_IPV6) && + (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC, + &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK, + sizeof(key->ipv6.src)) || + fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST, + &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK, + sizeof(key->ipv6.dst)))) + goto nla_put_failure; + + if (key->basic.ip_proto == IPPROTO_TCP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_UDP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, + &mask->tp.src, TCA_FLOWER_UNSPEC, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, + &mask->tp.dst, TCA_FLOWER_UNSPEC, + sizeof(key->tp.dst)))) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &f->exts)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &f->exts) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct tcf_proto_ops cls_fl_ops __read_mostly = { + .kind = "flower", + .classify = fl_classify, + .init = fl_init, + .destroy = fl_destroy, + .get = fl_get, + .change = fl_change, + .delete = fl_delete, + .walk = fl_walk, + .dump = fl_dump, + .owner = THIS_MODULE, +}; + +static int __init cls_fl_init(void) +{ + return register_tcf_proto_ops(&cls_fl_ops); +} + +static void __exit cls_fl_exit(void) +{ + unregister_tcf_proto_ops(&cls_fl_ops); +} + +module_init(cls_fl_init); +module_exit(cls_fl_exit); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("Flower classifier"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index dfe3da75594c..93d5742dc7e0 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -18,7 +18,7 @@ #include <net/pkt_sched.h> #include <net/inet_ecn.h> #include <net/red.h> -#include <net/flow_keys.h> +#include <net/flow_dissector.h> /* CHOKe stateless AQM for fair bandwidth allocation @@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1, if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &temp); + skb_flow_dissect_flow_keys(skb1, &temp); make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &temp); + skb_flow_dissect_flow_keys(skb2, &temp); make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } |