diff options
| author | David S. Miller <davem@davemloft.net> | 2015-12-15 23:25:20 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-12-15 23:25:20 -0500 |
| commit | fec65bd4e855c417b8408ed87824f9234fd0d795 (patch) | |
| tree | 207ff2184419083aa1a70fc37ba8a5ce54135ae9 /include | |
| parent | 3026043dc4035a6f6dff68065c9df245ead06344 (diff) | |
| parent | 7f00feaf107645d95a6d87e99b4d141ac0a08efd (diff) | |
Merge branch 'ila-early-demux'
Tom Herbert says:
====================
ila: Optimization to preserve value of early demux
In the current implementation of ILA, LWT is used to perform
translation on both the input and output paths. This is functional,
however there is a big performance hit in the receive path. Early
demux occurs before the routing lookup (a hit actually obviates the
route lookup). Therefore the stack currently performs early
demux before translation so that a local connection with ILA
addresses is never matched. Note that this issue is not just
with ILA, but pretty much any translated or encapsulated packet
handled by LWT would miss the opportunity for early demux. Solving
the general problem seems non trivial since we would need to move
the route lookup before early demx thereby mitigating the value.
This patch set addresses the issue for ILA by adding a fast locator
lookup that occurs before early demux. This done by hooking in to
NF_INET_PRE_ROUTING
For the backend we implement an rhashtable that contains identifier
to locator to mappings. The table also allows more specific matches
that include original locator and interface.
This patch set:
- Add an rhashtable function to atomically replace and element.
This is useful to implement sub-trees from a table entry
without needing to use a special anchor structure as the
table entry.
- Add a start callback for starting a netlink dump.
- Creates an ila directory under net/ipv6 and moves ila.c to it.
ila.c is split into ila_common.c and ila_lwt.c.
- Implement a table to do identifier->locator mapping. This is
an rhashtable (in ila_xlat.c).
- Configuration for the table with netlink.
- Add a hook into NF_INET_PRE_ROUTING to perform ILA translation
before early demux.
Changes in v2:
- Use iptables targets instead of a new xfrm function
Changes in v3:
- Add __rcu to next pointer in struct ila_map
Changes in v4:
- Use hook for NF_INET_PRE_ROUTING
Changed in v5:
- Register hooks per namespace using nf_register_net_hooks
- Only register hooks when first mapping is actually added
Changed in v6:
- Remove gfp argument in alloc_ila_locks, it is unnecessary
- Set registered_hooks properly when hooks are registered
Testing:
Running 200 netperf TCP_RR streams
No ILA, baseline
79.26% CPU utilization
1678282 tps
104/189/390 50/90/99% latencies
ILA before fix (LWT on both input and output)
81.91% CPU utilization
1464723 tps (-14.5% from baseline)
121/215/411 50/90/99% latencies
ILA after fix
80.62% CPU utilization
1622985 (-3.4% from baseline)
110/191/347 50/90/99% latencies
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/netlink.h | 2 | ||||
| -rw-r--r-- | include/linux/rhashtable.h | 82 | ||||
| -rw-r--r-- | include/net/genetlink.h | 2 | ||||
| -rw-r--r-- | include/net/ila.h | 18 | ||||
| -rw-r--r-- | include/uapi/linux/ila.h | 22 |
5 files changed, 126 insertions, 0 deletions
diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 843ceca9a21e..77deece15fb3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -819,4 +819,86 @@ out: return err; } +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t *lock; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash + * (which should mean identifiers are the same). + */ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj_old) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + rcu_assign_pointer(*pprev, obj_new); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht: hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params: hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/include/net/ila.h b/include/net/ila.h new file mode 100644 index 000000000000..9f4f43e94ae4 --- /dev/null +++ b/include/net/ila.h @@ -0,0 +1,18 @@ +/* + * ILA kernel interface + * + * Copyright (c) 2015 Tom Herbert <tom@herbertland.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#ifndef _NET_ILA_H +#define _NET_ILA_H + +int ila_xlat_outgoing(struct sk_buff *skb); +int ila_xlat_incoming(struct sk_buff *skb); + +#endif /* _NET_ILA_H */ diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 7ed9e670814e..abde7bbd6f3b 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _UAPI_LINUX_ILA_H #define _UAPI_LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER, /* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX (__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT (1 << 1) + #endif /* _UAPI_LINUX_ILA_H */ |
