From da5e36308d9f7151845018369148201a5d28b46d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 22 Jan 2013 09:50:24 +0000 Subject: soreuseport: TCP/IPv4 implementation Allow multiple listener sockets to bind to the same port. Motivation for soresuseport would be something like a web server binding to port 80 running with multiple threads, where each thread might have it's own listener socket. This could be done as an alternative to other models: 1) have one listener thread which dispatches completed connections to workers. 2) accept on a single listener socket from multiple threads. In case #1 the listener thread can easily become the bottleneck with high connection turn-over rate. In case #2, the proportion of connections accepted per thread tends to be uneven under high connection load (assuming simple event loop: while (1) { accept(); process() }, wakeup does not promote fairness among the sockets. We have seen the disproportion to be as high as 3:1 ratio between thread accepting most connections and the one accepting the fewest. With so_reusport the distribution is uniform. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/net/inet_hashtables.h') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 67a8fa098e3a..7b2ae9d37076 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -81,7 +81,9 @@ struct inet_bind_bucket { struct net *ib_net; #endif unsigned short port; - signed short fastreuse; + signed char fastreuse; + signed char fastreuseport; + kuid_t fastuid; int num_owners; struct hlist_node node; struct hlist_head owners; @@ -257,15 +259,19 @@ extern void inet_unhash(struct sock *sk); extern struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, + const __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); + return __inet_lookup_listener(net, hashinfo, saddr, sport, + daddr, ntohs(dport), dif); } /* Socket demux engine toys. */ @@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net, struct sock *sk = __inet_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); + return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport, + daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, -- cgit v1.2.3 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/inet_hashtables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net/inet_hashtables.h') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 7b2ae9d37076..ef83d9e844b5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -94,8 +94,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib) return read_pnet(&ib->ib_net); } -#define inet_bind_bucket_for_each(tb, pos, head) \ - hlist_for_each_entry(tb, pos, head, node) +#define inet_bind_bucket_for_each(tb, head) \ + hlist_for_each_entry(tb, head, node) struct inet_bind_hashbucket { spinlock_t lock; -- cgit v1.2.3