From bcdce7195e0eab55b37dbd53be53057f38006380 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Oct 2009 17:28:29 -0700 Subject: net: speedup sk_wake_async() An incoming datagram must bring into cpu cache *lot* of cache lines, in particular : (other parts omitted (hash chains, ip route cache...)) On 32bit arches : offsetof(struct sock, sk_rcvbuf) =0x30 (read) offsetof(struct sock, sk_lock) =0x34 (rw) offsetof(struct sock, sk_sleep) =0x50 (read) offsetof(struct sock, sk_rmem_alloc) =0x64 (rw) offsetof(struct sock, sk_receive_queue)=0x74 (rw) offsetof(struct sock, sk_forward_alloc)=0x98 (rw) offsetof(struct sock, sk_callback_lock)=0xcc (rw) offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped) offsetof(struct sock, sk_filter) =0xf8 (read) offsetof(struct sock, sk_socket) =0x138 (read) offsetof(struct sock, sk_data_ready) =0x15c (read) We can avoid sk->sk_socket and socket->fasync_list referencing on sockets with no fasync() structures. (socket->fasync_list ptr is probably already in cache because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep) This avoids one cache line load per incoming packet for common cases (no fasync()) We can leave (or even move in a future patch) sk->sk_socket in a cold location Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 1621935aad5b..98398bdec57d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -504,6 +504,7 @@ enum sock_flags { SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */ SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ + SOCK_FASYNC, /* fasync() active */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -1396,7 +1397,7 @@ static inline unsigned long sock_wspace(struct sock *sk) static inline void sk_wake_async(struct sock *sk, int how, int band) { - if (sk->sk_socket && sk->sk_socket->fasync_list) + if (sock_flag(sk, SOCK_FASYNC)) sock_wake_async(sk->sk_socket, how, band); } -- cgit v1.2.3 From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 Oct 2009 13:26:31 -0700 Subject: net: Generalize socket rx gap / receive queue overflow cmsg Create a new socket level option to report number of queue overflows Recently I augmented the AF_PACKET protocol to report the number of frames lost on the socket receive queue between any two enqueued frames. This value was exported via a SOL_PACKET level cmsg. AFter I completed that work it was requested that this feature be generalized so that any datagram oriented socket could make use of this option. As such I've created this patch, It creates a new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue overflowed between any two given frames. It also augments the AF_PACKET protocol to take advantage of this new feature (as it previously did not touch sk->sk_drops, which this patch uses to record the overflow count). Tested successfully by me. Notes: 1) Unlike my previous patch, this patch simply records the sk_drops value, which is not a number of drops between packets, but rather a total number of drops. Deltas must be computed in user space. 2) While this patch currently works with datagram oriented protocols, it will also be accepted by non-datagram oriented protocols. I'm not sure if thats agreeable to everyone, but my argument in favor of doing so is that, for those protocols which aren't applicable to this option, sk_drops will always be zero, and reporting no drops on a receive queue that isn't used for those non-participating protocols seems reasonable to me. This also saves us having to code in a per-protocol opt in mechanism. 3) This applies cleanly to net-next assuming that commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 98398bdec57d..10669b01eeab 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -505,6 +505,7 @@ enum sock_flags { SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */ SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ SOCK_FASYNC, /* fasync() active */ + SOCK_RXQ_OVFL, }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -1493,6 +1494,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) sk->sk_stamp = kt; } +extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); + /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @msg: outgoing packet -- cgit v1.2.3 From e022f0b4a03f4fff9323b509df023b8af635716e Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Mon, 19 Oct 2009 23:46:20 +0000 Subject: net: Introduce sk_tx_queue_mapping Introduce sk_tx_queue_mapping; and functions that set, test and get this value. Reset sk_tx_queue_mapping to -1 whenever the dst cache is set/reset, and in socket alloc. Setting txq to -1 and using valid txq=<0 to n-1> allows the tx path to use the value of sk_tx_queue_mapping directly instead of subtracting 1 on every tx. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/net/sock.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 1364428f53f2..55de3bd719a5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -107,6 +107,7 @@ struct net; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_refcnt: reference count + * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state @@ -128,6 +129,7 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; atomic_t skc_refcnt; + int skc_tx_queue_mapping; unsigned int skc_hash; unsigned short skc_family; @@ -215,6 +217,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping #define sk_copy_start __sk_common.skc_hash #define sk_hash __sk_common.skc_hash @@ -1094,8 +1097,29 @@ static inline void sock_put(struct sock *sk) extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) +{ + sk->sk_tx_queue_mapping = tx_queue; +} + +static inline void sk_tx_queue_clear(struct sock *sk) +{ + sk->sk_tx_queue_mapping = -1; +} + +static inline int sk_tx_queue_get(const struct sock *sk) +{ + return sk->sk_tx_queue_mapping; +} + +static inline bool sk_tx_queue_recorded(const struct sock *sk) +{ + return (sk && sk->sk_tx_queue_mapping >= 0); +} + static inline void sk_set_socket(struct sock *sk, struct socket *sock) { + sk_tx_queue_clear(sk); sk->sk_socket = sock; } @@ -1152,6 +1176,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old_dst; + sk_tx_queue_clear(sk); old_dst = sk->sk_dst_cache; sk->sk_dst_cache = dst; dst_release(old_dst); @@ -1170,6 +1195,7 @@ __sk_dst_reset(struct sock *sk) { struct dst_entry *old_dst; + sk_tx_queue_clear(sk); old_dst = sk->sk_dst_cache; sk->sk_dst_cache = NULL; dst_release(old_dst); -- cgit v1.2.3 From d4cada4ae1c012815f95fa507eb86a0ae9d607d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2009 10:17:30 +0000 Subject: udp: split sk_hash into two u16 hashes Union sk_hash with two u16 hashes for udp (no extra memory taken) One 16 bits hash on (local port) value (the previous udp 'hash') One 16 bits hash on (local address, local port) values, initialized but not yet used. This second hash is using jenkin hash for better distribution. Because the 'port' is xored later, a partial hash is performed on local address + net_hash_mix(net) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 55de3bd719a5..827366b62680 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -109,6 +109,7 @@ struct net; * @skc_refcnt: reference count * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables + * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting @@ -131,7 +132,10 @@ struct sock_common { atomic_t skc_refcnt; int skc_tx_queue_mapping; - unsigned int skc_hash; + union { + unsigned int skc_hash; + __u16 skc_u16hashes[2]; + }; unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; -- cgit v1.2.3 From 512615b6b843ff3ff5ad583f34c39b3f302f5f26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Nov 2009 10:17:58 +0000 Subject: udp: secondary hash on (local port, local address) Extends udp_table to contain a secondary hash table. socket anchor for this second hash is free, because UDP doesnt use skc_bind_node : We define an union to hold both skc_bind_node & a new hlist_nulls_node udp_portaddr_node udp_lib_get_port() inserts sockets into second hash chain (additional cost of one atomic op) udp_lib_unhash() deletes socket from second hash chain (additional cost of one atomic op) Note : No spinlock lockdep annotation is needed, because lock for the secondary hash chain is always get after lock for primary hash chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 827366b62680..3f1a4804bb3f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -105,7 +105,7 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_refcnt: reference count * @skc_tx_queue_mapping: tx queue number for this connection * @skc_hash: hash value used with various protocol lookup tables @@ -115,6 +115,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables + * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -140,7 +141,10 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_bind_node; + union { + struct hlist_node skc_bind_node; + struct hlist_nulls_node skc_portaddr_node; + }; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; -- cgit v1.2.3