From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 Oct 2009 13:26:31 -0700 Subject: net: Generalize socket rx gap / receive queue overflow cmsg Create a new socket level option to report number of queue overflows Recently I augmented the AF_PACKET protocol to report the number of frames lost on the socket receive queue between any two enqueued frames. This value was exported via a SOL_PACKET level cmsg. AFter I completed that work it was requested that this feature be generalized so that any datagram oriented socket could make use of this option. As such I've created this patch, It creates a new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue overflowed between any two given frames. It also augments the AF_PACKET protocol to take advantage of this new feature (as it previously did not touch sk->sk_drops, which this patch uses to record the overflow count). Tested successfully by me. Notes: 1) Unlike my previous patch, this patch simply records the sk_drops value, which is not a number of drops between packets, but rather a total number of drops. Deltas must be computed in user space. 2) While this patch currently works with datagram oriented protocols, it will also be accepted by non-datagram oriented protocols. I'm not sure if thats agreeable to everyone, but my argument in favor of doing so is that, for those protocols which aren't applicable to this option, sk_drops will always be zero, and reporting no drops on a receive queue that isn't used for those non-participating protocols seems reasonable to me. This also saves us having to code in a per-protocol opt in mechanism. 3) This applies cleanly to net-next assuming that commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/raw.c') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 757c9171e7c2..f18172b07611 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -682,7 +682,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (sin) { -- cgit v1.2.3 From 766e9037cc139ee25ed93ee5ad11e1450c4b99f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Oct 2009 20:40:11 -0700 Subject: net: sk_drops consolidation sock_queue_rcv_skb() can update sk_drops itself, removing need for callers to take care of it. This is more consistent since sock_queue_rcv_skb() also reads sk_drops when queueing a skb. This adds sk_drops managment to many protocols that not cared yet. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/raw.c') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f18172b07611..39e2a6b8752c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,6 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk, skb) < 0) { - atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } -- cgit v1.2.3 From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Oct 2009 06:30:45 +0000 Subject: inet: rename some inet_sock fields In order to have better cache layouts of struct sock (separate zones for rx/tx paths), we need this preliminary patch. Goal is to transfert fields used at lookup time in the first read-mostly cache line (inside struct sock_common) and move sk_refcnt to a separate cache line (only written by rx path) This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr, sport and id fields. This allows a future patch to define these fields as macros, like sk_refcnt, without name clashes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net/ipv4/raw.c') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 39e2a6b8752c..9ef8c0829a77 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *head; - head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; + head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; write_lock_bh(&h->lock); sk_add_node(sk, head); @@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->num == num && - !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + if (net_eq(sock_net(sk), net) && inet->inet_num == num && + !(inet->inet_daddr && inet->inet_daddr != raddr) && + !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -326,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, int err; if (length > rt->u.dst.dev->mtu) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, rt->u.dst.dev->mtu); return -EMSGSIZE; } @@ -489,10 +489,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; - daddr = inet->daddr; + daddr = inet->inet_daddr; } - ipc.addr = inet->saddr; + ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.shtx.flags = 0; ipc.oif = sk->sk_bound_dev_if; @@ -634,9 +634,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - inet->saddr = 0; /* Use device */ + inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; out: return ret; @@ -706,7 +706,7 @@ static int raw_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); - if (inet_sk(sk)->num == IPPROTO_ICMP) + if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); return 0; } @@ -743,7 +743,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_seticmpfilter(sk, optval, optlen); @@ -773,7 +773,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_geticmpfilter(sk, optval, optlen); @@ -932,10 +932,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop); static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr, - src = inet->rcv_saddr; + __be32 dest = inet->inet_daddr, + src = inet->inet_rcv_saddr; __u16 destp = 0, - srcp = inet->num; + srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", -- cgit v1.2.3