From b917783c7b350518f8c5d88bb5848aa8064408a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 18:49:46 +0200 Subject: flow_dissector: __skb_get_hash_symmetric arg can be const Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f6e621..663fda2887f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1086,7 +1086,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); -- cgit v1.2.3 From 7c13f97ffde63cc792c49ec1513f3974f2f05229 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Nov 2016 11:28:59 +0100 Subject: udp: do fwd memory scheduling on dequeue A new argument is added to __skb_recv_datagram to provide an explicit skb destructor, invoked under the receive queue lock. The UDP protocol uses such argument to perform memory reclaiming on dequeue, so that the UDP protocol does not set anymore skb->desctructor. Instead explicit memory reclaiming is performed at close() time and when skbs are removed from the receive queue. The in kernel UDP protocol users now need to call a skb_recv_udp() variant instead of skb_recv_datagram() to properly perform memory accounting on dequeue. Overall, this allows acquiring only once the receive queue lock on dequeue. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr sinks vanilla patched 1 440 560 3 2150 2300 6 3650 3800 9 4450 4600 12 6250 6450 v1 -> v2: - do rmem and allocated memory scheduling under the receive lock - do bulk scheduling in first_packet_length() and in udp_destruct_sock() - avoid the typdef for the dequeue callback Suggested-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc6e23eaac91..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -- cgit v1.2.3 From c72d8cdaa5dbd3baf918046ee5149ab69330923e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Nov 2016 04:08:08 +0300 Subject: net: fix bogus cast in skb_pagelen() and use unsigned variables 1) cast to "int" is unnecessary: u8 will be promoted to int before decrementing, small positive numbers fit into "int", so their values won't be changed during promotion. Once everything is int including loop counters, signedness doesn't matter: 32-bit operations will stay 32-bit operations. But! Someone tried to make this loop smart by making everything of the same type apparently in an attempt to optimise it. Do the optimization, just differently. Do the cast where it matters. :^) 2) frag size is unsigned entity and sum of fragments sizes is also unsigned. Make everything unsigned, leave no MOVSX instruction behind. add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-4 (-4) function old new delta skb_cow_data 835 834 -1 ip_do_fragment 2549 2548 -1 ip6_fragment 3130 3128 -2 Total: Before=154865032, After=154865028, chg -0.00% Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a4aeeca7e805..9c535fbccf2c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1799,11 +1799,11 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int skb_pagelen(const struct sk_buff *skb) { - int i, len = 0; + unsigned int i, len = 0; - for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) + for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len + skb_headlen(skb); } -- cgit v1.2.3 From c8c8b127091b758f5768f906bcdeeb88bc9951ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2016 09:19:33 -0800 Subject: udp: under rx pressure, try to condense skbs Under UDP flood, many softirq producers try to add packets to UDP receive queue, and one user thread is burning one cpu trying to dequeue packets as fast as possible. Two parts of the per packet cost are : - copying payload from kernel space to user space, - freeing memory pieces associated with skb. If socket is under pressure, softirq handler(s) can try to pull in skb->head the payload of the packet if it fits. Meaning the softirq handler(s) can free/reuse the page fragment immediately, instead of letting udp_recvmsg() do this hundreds of usec later, possibly from another node. Additional gains : - We reduce skb->truesize and thus can store more packets per SO_RCVBUF - We avoid cache line misses at copyout() time and consume_skb() time, and avoid one put_page() with potential alien freeing on NUMA hosts. This comes at the cost of a copy, bounded to available tail room, which is usually small. (We might have to fix GRO_MAX_HEAD which looks bigger than necessary) This patch gave me about 5 % increase in throughput in my tests. skb_condense() helper could probably used in other contexts. Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c535fbccf2c..0cd92b0f2af5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1966,6 +1966,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } +void skb_condense(struct sk_buff *skb); + /** * skb_headroom - bytes at buffer head * @skb: buffer to check -- cgit v1.2.3 From c84d949057cab262b4d3110ead9a42a58c2958f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Dec 2016 11:41:55 -0800 Subject: udp: copy skb->truesize in the first cache line In UDP RX handler, we currently clear skb->dev before skb is added to receive queue, because device pointer is no longer available once we exit from RCU section. Since this first cache line is always hot, lets reuse this space to store skb->truesize and thus avoid a cache line miss at udp_recvmsg()/udp_skb_destructor time while receive queue spinlock is held. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0cd92b0f2af5..332e76756f54 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,8 +645,15 @@ struct sk_buff { struct rb_node rbnode; /* used in netem & tcp stack */ }; struct sock *sk; - struct net_device *dev; + union { + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; + }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you -- cgit v1.2.3