From 6648bd7e0e62c0c8c03b15e00c9e7015e232feff Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 21 Jun 2012 13:58:31 +0000 Subject: ipv4: Add sysctl knob to control early socket demux This change is meant to add a control for disabling early socket demux. The main motivation behind this patch is to provide an option to disable the feature as it adds an additional cost to routing that reduces overall throughput by up to 5%. For example one of my systems went from 12.1Mpps to 11.6 after the early socket demux was added. It looks like the reason for the regression is that we are now having to perform two lookups, first the one for an established socket, and then the one for the routing table. By adding this patch and toggling the value for ip_early_demux to 0 I am able to get back to the 12.1Mpps I was previously seeing. [ Move local variables in ip_rcv_finish() down into the basic block in which they are actually used. -DaveM ] Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/net/ip.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index 83e0619f59d0..50841bd6f10e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -210,6 +210,9 @@ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; +/* From ip_input.c */ +extern int sysctl_ip_early_demux; + /* From ip_output.c */ extern int sysctl_ip_dynaddr; -- cgit v1.2.3 From 70e7341673a47fb1525cfc7d6651cc98b5348928 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 03:21:41 -0700 Subject: ipv4: Show that ip_send_reply() is purely unicast routine. Rename it to ip_send_unicast_reply() and add explicit 'saddr' argument. This removed one of the few users of rt->rt_spec_dst. Signed-off-by: David S. Miller --- include/net/ip.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index 50841bd6f10e..ec5cfde85e9a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,8 +158,9 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - const struct ip_reply_arg *arg, unsigned int len); +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, + __be32 saddr, const struct ip_reply_arg *arg, + unsigned int len); struct ipv4_config { int log_martians; -- cgit v1.2.3 From be9f4a44e7d41cee50ddb5f038fc2391cbbb4046 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2012 07:34:03 +0000 Subject: ipv4: tcp: remove per net tcp_sock tcp_v4_send_reset() and tcp_v4_send_ack() use a single socket per network namespace. This leads to bad behavior on multiqueue NICS, because many cpus contend for the socket lock and once socket lock is acquired, extra false sharing on various socket fields slow down the operations. To better resist to attacks, we use a percpu socket. Each cpu can run without contention, using appropriate memory (local node) Additional features : 1) We also mirror the queue_mapping of the incoming skb, so that answers use the same queue if possible. 2) Setting SOCK_USE_WRITE_QUEUE socket flag speedup sock_wfree() 3) We now limit the number of in-flight RST/ACK [1] packets per cpu, instead of per namespace, and we honor the sysctl_wmem_default limit dynamically. (Prior to this patch, sysctl_wmem_default value was copied at boot time, so any further change would not affect tcp_sock limit) [1] These packets are only generated when no socket was matched for the incoming packet. Reported-by: Bill Sommerfeld Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index ec5cfde85e9a..bd5e444a19ce 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,7 +158,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, +void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len); -- cgit v1.2.3 From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Aug 2012 02:22:47 +0000 Subject: ipv4: fix ip_send_skb() ip_send_skb() can send orphaned skb, so we must pass the net pointer to avoid possible NULL dereference in error path. Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not land outside of TCP stack) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/ip.h') diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444a19ce..5a5d84d3d2c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -120,7 +120,7 @@ extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); -extern int ip_send_skb(struct sk_buff *skb); +extern int ip_send_skb(struct net *net, struct sk_buff *skb); extern int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); extern void ip_flush_pending_frames(struct sock *sk); extern struct sk_buff *ip_make_skb(struct sock *sk, -- cgit v1.2.3