diff options
author | Eric Dumazet <edumazet@google.com> | 2014-06-02 05:26:03 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-08-14 08:42:35 +0800 |
commit | ad52eef552c7896ec6024ee72fc126167fe5c4e2 (patch) | |
tree | ad82cf940ab103a6b51260f681b22d21f6ecdb2c /include | |
parent | 0a9d91dca3b9f797f2fc615486c12afa59f19a3b (diff) |
inetpeer: get rid of ip_id_count
[ Upstream commit 73f156a6e8c1074ac6327e0abd1169e95eb66463 ]
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
is about 20.
4) If server deals with many tcp flows, we have a high probability of
not finding the inet_peer, allocating a fresh one, inserting it in
the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/net/inetpeer.h | 14 | ||||
-rw-r--r-- | include/net/ip.h | 40 | ||||
-rw-r--r-- | include/net/ipip.h | 2 | ||||
-rw-r--r-- | include/net/ipv6.h | 9 | ||||
-rw-r--r-- | include/net/secure_seq.h | 2 |
5 files changed, 33 insertions, 34 deletions
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 2d643649f0f8..168d30dfe807 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,13 +46,12 @@ struct inet_peer { }; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp + * are not available: rid, tcp_ts, tcp_ts_stamp * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ - atomic_t ip_id_count; /* IP ID for the next packet */ __u32 tcp_ts; __u32 tcp_ts_stamp; }; @@ -102,7 +101,7 @@ extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); extern void inetpeer_invalidate_tree(int family); /* - * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, + * temporary check to make sure we dont access rid, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer */ static inline void inet_peer_refcheck(const struct inet_peer *p) @@ -110,13 +109,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); } - -/* can be called with or without local BH being disabled */ -static inline int inet_getid(struct inet_peer *p, int more) -{ - more++; - inet_peer_refcheck(p); - return atomic_add_return(more, &p->ip_id_count) - more; -} - #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 6d6b12f4753c..f70e64b786a3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -264,9 +264,19 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) !(dst_metric_locked(dst, RTAX_MTU))); } -extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); +#define IP_IDENTS_SZ 2048u +extern atomic_t *ip_idents; -static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) +static inline u32 ip_idents_reserve(u32 hash, int segs) +{ + atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; + + return atomic_add_return(segs, id_ptr) - segs; +} + +void __ip_select_ident(struct iphdr *iph, int segs); + +static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -276,24 +286,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && inet_sk(sk)->inet_daddr) ? - htons(inet_sk(sk)->inet_id++) : 0; - } else - __ip_select_ident(iph, dst, 0); -} - -static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) -{ - struct iphdr *iph = ip_hdr(skb); - - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += 1 + more; - } else + inet_sk(sk)->inet_id += segs; + } else { iph->id = 0; - } else - __ip_select_ident(iph, dst, more); + } + } else { + __ip_select_ident(iph, segs); + } +} + +static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +{ + ip_select_ident_segs(skb, sk, 1); } /* diff --git a/include/net/ipip.h b/include/net/ipip.h index 4dccfe3bf731..e8ee3bb1b1ca 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry { int pkt_len = skb->len - skb_transport_offset(skb); \ \ skb->ip_summed = CHECKSUM_NONE; \ - ip_select_ident(skb, &rt->dst, NULL); \ + ip_select_ident(skb, NULL); \ \ err = ip_local_out(skb); \ if (likely(net_xmit_eval(err) == 0)) { \ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fa7af9183dc9..117eaa578d0d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -392,14 +392,19 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a); int ip6_frag_match(struct inet_frag_queue *q, void *a); /* more secured version of ipv6_addr_hash() */ -static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; return jhash_3words(v, (__force u32)a->s6_addr32[2], (__force u32)a->s6_addr32[3], - ipv6_hash_secret); + initval); +} + +static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +{ + return __ipv6_addr_jhash(a, ipv6_hash_secret); } static inline int ipv6_addr_any(const struct in6_addr *a) diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index c2e542b27a5a..b1c3d1c63c4e 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,8 +3,6 @@ #include <linux/types.h> -extern __u32 secure_ip_id(__be32 daddr); -extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); |