From 545cd5e5ec5477c325e4098b6fd21213dceda408 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 10:07:53 -0700 Subject: net: Busy polling should ignore sender CPUs This patch is a cleanup/fix for NAPI IDs following the changes that made it so that sender_cpu and napi_id were doing a better job of sharing the same location in the sk_buff. One issue I found is that we weren't validating the napi_id as being valid before we started trying to setup the busy polling. This change corrects that by using the MIN_NAPI_ID value that is now used in both allocating the NAPI IDs, as well as validating them. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c0452de83086..3fcda9e70c3f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -35,6 +35,12 @@ struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; +/* 0 - Reserved to indicate value not set + * 1..NR_CPUS - Reserved for sender_cpu + * NR_CPUS+1..~0 - Region available for NAPI IDs + */ +#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; @@ -58,10 +64,9 @@ static inline unsigned long busy_loop_end_time(void) static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); + return sk->sk_ll_usec && !signal_pending(current); } - static inline bool busy_loop_timeout(unsigned long end_time) { unsigned long now = busy_loop_us_clock(); -- cgit v1.2.3 From d2e64dbbe95b2b51eb723134274de1d3f30bce80 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 10:08:06 -0700 Subject: net: Only define skb_mark_napi_id in one spot instead of two Instead of defining two versions of skb_mark_napi_id I think it is more readable to just match the format of the sk_mark_napi_id functions and just wrap the contents of the function instead of defining two versions of the function. This way we can save a few lines of code since we only need 2 of the ifdef/endif but needed 5 for the extra function declaration. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 3fcda9e70c3f..b82d6ba70a14 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -76,14 +76,6 @@ static inline bool busy_loop_timeout(unsigned long end_time) bool sk_busy_loop(struct sock *sk, int nonblock); -/* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) -{ - skb->napi_id = napi->napi_id; -} - - #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { @@ -100,11 +92,6 @@ static inline bool sk_can_busy_loop(struct sock *sk) return false; } -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) -{ -} - static inline bool busy_loop_timeout(unsigned long end_time) { return true; @@ -117,6 +104,15 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) #endif /* CONFIG_NET_RX_BUSY_POLL */ +/* used in the NIC receive handler to mark the skb */ +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + skb->napi_id = napi->napi_id; +#endif +} + /* used in the protocol hanlder to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { -- cgit v1.2.3 From 2b5cd0dfa384242f78a396b90087368c9440cc9a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 10:08:12 -0700 Subject: net: Change return type of sk_busy_loop from bool to void checking the return value of sk_busy_loop. As there are only a few consumers of that data, and the data being checked for can be replaced with a check for !skb_queue_empty() we might as well just pull the code out of sk_busy_loop and place it in the spots that actually need it. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b82d6ba70a14..c55760f4820f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -74,7 +74,7 @@ static inline bool busy_loop_timeout(unsigned long end_time) return time_after(now, end_time); } -bool sk_busy_loop(struct sock *sk, int nonblock); +void sk_busy_loop(struct sock *sk, int nonblock); #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -97,9 +97,8 @@ static inline bool busy_loop_timeout(unsigned long end_time) return true; } -static inline bool sk_busy_loop(struct sock *sk, int nonblock) +static inline void sk_busy_loop(struct sock *sk, int nonblock) { - return false; } #endif /* CONFIG_NET_RX_BUSY_POLL */ -- cgit v1.2.3 From 37056719bba500d0d2b8216fdf641e5507ec9a0e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 10:08:18 -0700 Subject: net: Track start of busy loop instead of when it should end This patch flips the logic we were using to determine if the busy polling has timed out. The main motivation for this is that we will need to support two different possible timeout values in the future and by recording the start time rather than when we would want to end we can focus on making the end_time specific to the task be it epoll or socket based polling. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 68 +++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c55760f4820f..72c82f2ea536 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -46,62 +46,70 @@ static inline bool net_busy_loop_on(void) return sysctl_net_busy_poll; } -static inline u64 busy_loop_us_clock(void) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return local_clock() >> 10; + return sk->sk_ll_usec && !signal_pending(current); } -static inline unsigned long sk_busy_loop_end_time(struct sock *sk) -{ - return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec); -} +void sk_busy_loop(struct sock *sk, int nonblock); -/* in poll/select we use the global sysctl_net_ll_poll value */ -static inline unsigned long busy_loop_end_time(void) +#else /* CONFIG_NET_RX_BUSY_POLL */ +static inline unsigned long net_busy_loop_on(void) { - return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); + return 0; } -static inline bool sk_can_busy_loop(const struct sock *sk) +static inline bool sk_can_busy_loop(struct sock *sk) { - return sk->sk_ll_usec && !signal_pending(current); + return false; } -static inline bool busy_loop_timeout(unsigned long end_time) +static inline void sk_busy_loop(struct sock *sk, int nonblock) { - unsigned long now = busy_loop_us_clock(); - - return time_after(now, end_time); } -void sk_busy_loop(struct sock *sk, int nonblock); +#endif /* CONFIG_NET_RX_BUSY_POLL */ -#else /* CONFIG_NET_RX_BUSY_POLL */ -static inline unsigned long net_busy_loop_on(void) +static inline unsigned long busy_loop_current_time(void) { +#ifdef CONFIG_NET_RX_BUSY_POLL + return (unsigned long)(local_clock() >> 10); +#else return 0; +#endif } -static inline unsigned long busy_loop_end_time(void) +/* in poll/select we use the global sysctl_net_ll_poll value */ +static inline bool busy_loop_timeout(unsigned long start_time) { - return 0; -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll); -static inline bool sk_can_busy_loop(struct sock *sk) -{ - return false; -} + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); -static inline bool busy_loop_timeout(unsigned long end_time) -{ + return time_after(now, end_time); + } +#endif return true; } -static inline void sk_busy_loop(struct sock *sk, int nonblock) +static inline bool sk_busy_loop_timeout(struct sock *sk, + unsigned long start_time) { -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec); -#endif /* CONFIG_NET_RX_BUSY_POLL */ + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); + + return time_after(now, end_time); + } +#endif + return true; +} /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, -- cgit v1.2.3 From 7db6b048da3b9f84fe1d22fb29ff7e7c2ec6c0e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 24 Mar 2017 10:08:24 -0700 Subject: net: Commonize busy polling code to focus on napi_id instead of socket Move the core functionality in sk_busy_loop() to napi_busy_loop() and make it independent of sk. This enables re-using this function in epoll busy loop implementation. Signed-off-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 72c82f2ea536..8ffd434676b7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -51,7 +51,11 @@ static inline bool sk_can_busy_loop(const struct sock *sk) return sk->sk_ll_usec && !signal_pending(current); } -void sk_busy_loop(struct sock *sk, int nonblock); +bool sk_busy_loop_end(void *p, unsigned long start_time); + +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg); #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -64,10 +68,6 @@ static inline bool sk_can_busy_loop(struct sock *sk) return false; } -static inline void sk_busy_loop(struct sock *sk, int nonblock) -{ -} - #endif /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long busy_loop_current_time(void) @@ -111,6 +111,16 @@ static inline bool sk_busy_loop_timeout(struct sock *sk, return true; } +static inline void sk_busy_loop(struct sock *sk, int nonblock) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(sk->sk_napi_id); + + if (napi_id >= MIN_NAPI_ID) + napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); +#endif +} + /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) -- cgit v1.2.3 From 6d4339028b350efbf87c61e6d9e113e5373545c9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 24 Mar 2017 10:08:36 -0700 Subject: net: Introduce SO_INCOMING_NAPI_ID This socket option returns the NAPI ID associated with the queue on which the last frame is received. This information can be used by the apps to split the incoming flows among the threads based on the Rx queue on which they are received. If the NAPI ID actually represents a sender_cpu then the value is ignored and 0 is returned. Signed-off-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8313702c1eae..c98a52fb572a 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -96,4 +96,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* __ASM_GENERIC_SOCKET_H */ -- cgit v1.2.3