From edf391ff17232f097d72441c9ad467bcb3b5db18 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Apr 2009 02:45:02 -0700 Subject: snmp: add missing counters for RFC 4293 The IP MIB (RFC 4293) defines stats for InOctets, OutOctets, InMcastOctets and OutMcastOctets: http://tools.ietf.org/html/rfc4293 But it seems we don't track those in any way that easy to separate from other protocols. This patch adds those missing counters to the stats file. Tested successfully by me With help from Eric Dumazet. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9fb49c3b518a..735a2bf4b5f1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -159,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb) } } - IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, + skb->len); } return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, @@ -275,8 +276,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), - IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUT, skb->len); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -1516,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); -- cgit v1.2.3 From adf30907d63893e4208dfe3f5c88ae12bc2f25d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jun 2009 05:19:30 +0000 Subject: net: skb->dst accessors Define three accessors to get/set dst attached to a skb struct dst_entry *skb_dst(const struct sk_buff *skb) void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) void skb_dst_drop(struct sk_buff *skb) This one should replace occurrences of : dst_release(skb->dst) skb->dst = NULL; Delete skb->dst field Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 735a2bf4b5f1..c8dc8e5a822f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -78,7 +78,7 @@ int __ip6_local_out(struct sk_buff *skb) len = 0; ipv6_hdr(skb)->payload_len = htons(len); - return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, + return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); } @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(ip6_local_out); static int ip6_output_finish(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); if (dst->hh) return neigh_hh_output(dst->hh, skb); @@ -117,7 +117,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!newskb->dst); + WARN_ON(!skb_dst(newskb)); netif_rx(newskb); return 0; @@ -126,7 +126,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) static int ip6_output2(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); @@ -134,7 +134,7 @@ static int ip6_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && ((mroute6_socket(dev_net(dev)) && @@ -172,21 +172,21 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb->dst->dev->mtu : dst_mtu(skb->dst); + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); } int ip6_output(struct sk_buff *skb) { - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(dev_net(skb->dst->dev), idev, + IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb->dst)) + dst_allfrag(skb_dst(skb))) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -202,7 +202,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; @@ -222,7 +222,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; @@ -276,7 +276,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst), + IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -286,7 +286,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -416,7 +416,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb) int ip6_forward(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); @@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } - dst = skb->dst; + dst = skb_dst(skb); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. @@ -566,8 +566,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; - dst_release(to->dst); - to->dst = dst_clone(from->dst); + skb_dst_drop(to); + skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; @@ -624,7 +624,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info*)skb->dst; + struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; @@ -632,7 +632,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; - struct net *net = dev_net(skb->dst->dev); + struct net *net = dev_net(skb_dst(skb)->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -644,9 +644,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * check should be redundant, but it's free.) */ if (!skb->local_df) { - skb->dev = skb->dst->dev; + skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -696,7 +696,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } @@ -809,7 +809,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; @@ -873,16 +873,16 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: - IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; @@ -1516,10 +1516,10 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + skb_dst_set(skb, dst_clone(&rt->u.dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { - struct inet6_dev *idev = ip6_dst_idev(skb->dst); + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); @@ -1545,8 +1545,8 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - if (skb->dst) - IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), + if (skb_dst(skb)) + IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } -- cgit v1.2.3 From 4d9092bb41c8fdca45513461587d8b4ad3918f74 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Jun 2009 00:20:05 -0700 Subject: ipv6: Use frag list abstraction interfaces. Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c8dc8e5a822f..db6c7224a862 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -658,7 +658,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } mtu -= hlen + sizeof(struct frag_hdr); - if (skb_shinfo(skb)->frag_list) { + if (skb_has_frags(skb)) { int first_len = skb_pagelen(skb); int truesizes = 0; @@ -667,7 +667,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_cloned(skb)) goto slow_path; - for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { + skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || @@ -690,7 +690,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = 0; offset = 0; frag = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = NULL; + skb_frag_list_init(skb); /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; -- cgit v1.2.3 From 2b85a34e911bf483c27cfdd124aeb1605145dc80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jun 2009 02:55:43 -0700 Subject: net: No more expensive sock_hold()/sock_put() on each tx One of the problem with sock memory accounting is it uses a pair of sock_hold()/sock_put() for each transmitted packet. This slows down bidirectional flows because the receive path also needs to take a refcount on socket and might use a different cpu than transmit path or transmit completion path. So these two atomic operations also trigger cache line bounces. We can see this in tx or tx/rx workloads (media gateways for example), where sock_wfree() can be in top five functions in profiles. We use this sock_hold()/sock_put() so that sock freeing is delayed until all tx packets are completed. As we also update sk_wmem_alloc, we could offset sk_wmem_alloc by one unit at init time, until sk_free() is called. Once sk_free() is called, we atomic_dec_and_test(sk_wmem_alloc) to decrement initial offset and atomicaly check if any packets are in flight. skb_set_owner_w() doesnt call sock_hold() anymore sock_wfree() doesnt call sock_put() anymore, but check if sk_wmem_alloc reached 0 to perform the final freeing. Drawback is that a skb->truesize error could lead to unfreeable sockets, or even worse, prematurely calling __sk_free() on a live socket. Nice speedups on SMP. tbench for example, going from 2691 MB/s to 2711 MB/s on my 8 cpu dev machine, even if tbench was not really hitting sk_refcnt contention point. 5 % speedup on a UDP transmit workload (depends on number of flows), lowering TX completion cpu usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index db6c7224a862..7c76e3d18215 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -680,7 +680,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) BUG_ON(frag->sk); if (skb->sk) { - sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; truesizes += frag->truesize; -- cgit v1.2.3