From 0c0a5ef809f9150e9229e7b13e43183b681b7a39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:16 -0700 Subject: tcp: move inet->rx_dst_ifindex to sk->sk_rx_dst_ifindex Increase cache locality by moving rx_dst_ifindex next to sk->sk_rx_dst This is part of an effort to reduce cache line misses in TCP fast path. This removes one cache line miss in early demux. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 3 +-- include/net/sock.h | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89163ef8cf4b..9e1111f5915b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -207,11 +207,10 @@ struct inet_sock { __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; + struct ip_options_rcu __rcu *inet_opt; __be16 inet_sport; __u16 inet_id; - struct ip_options_rcu __rcu *inet_opt; - int rx_dst_ifindex; __u8 tos; __u8 min_ttl; __u8 mc_ttl; diff --git a/include/net/sock.h b/include/net/sock.h index 596ba85611bc..0bfb3f138bda 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -259,6 +259,7 @@ struct bpf_local_storage; * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux + * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy @@ -430,6 +431,8 @@ struct sock { struct xfrm_policy __rcu *sk_policy[2]; #endif struct dst_entry *sk_rx_dst; + int sk_rx_dst_ifindex; + struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; -- cgit v1.2.3 From ef57c1610dd8fba5031bf71e0db73356190de151 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:17 -0700 Subject: ipv6: move inet6_sk(sk)->rx_dst_cookie to sk->sk_rx_dst_cookie Increase cache locality by moving rx_dst_coookie next to sk->sk_rx_dst This removes one or two cache line misses in IPv6 early demux (TCP/UDP) Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 - include/net/sock.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ef4a69865737..c383630d3f06 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -282,7 +282,6 @@ struct ipv6_pinfo { __be32 rcv_flowinfo; __u32 dst_cookie; - __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/sock.h b/include/net/sock.h index 0bfb3f138bda..99c4194cb61a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -260,6 +260,7 @@ struct bpf_local_storage; * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst + * @sk_rx_dst_cookie: cookie for @sk_rx_dst * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy @@ -432,6 +433,7 @@ struct sock { #endif struct dst_entry *sk_rx_dst; int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; -- cgit v1.2.3 From 2b13af8ade38ef3afe48d60c518c64010cfa8b0d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:18 -0700 Subject: net: avoid dirtying sk->sk_napi_id sk_napi_id is located in a cache line that can be kept read mostly. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 40296ed976a9..4202c609bb0b 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -130,7 +130,8 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - WRITE_ONCE(sk->sk_napi_id, skb->napi_id); + if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif sk_rx_queue_set(sk, skb); } -- cgit v1.2.3 From 342159ee394d103db4bea1b01f932c50e66be163 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:19 -0700 Subject: net: avoid dirtying sk->sk_rx_queue_mapping sk_rx_queue_mapping is located in a cache line that should be kept read mostly. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/sock.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 99c4194cb61a..b4d3744b188a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1916,10 +1916,8 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); - if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING)) - return; - - sk->sk_rx_queue_mapping = rx_queue; + if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue)) + WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue); } #endif } -- cgit v1.2.3 From 09b898466792b0c387040e6df90a16e7a9f2a5d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:20 -0700 Subject: net: annotate accesses to sk->sk_rx_queue_mapping sk->sk_rx_queue_mapping can be modified locklessly, add a couple of READ_ONCE()/WRITE_ONCE() to document this fact. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sock.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b4d3744b188a..b76be30674ef 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1925,15 +1925,19 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) static inline void sk_rx_queue_clear(struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; + WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING); #endif } static inline int sk_rx_queue_get(const struct sock *sk) { #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING - if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_rx_queue_mapping; + if (sk) { + int res = READ_ONCE(sk->sk_rx_queue_mapping); + + if (res != NO_QUEUE_MAPPING) + return res; + } #endif return -1; -- cgit v1.2.3 From 790eb67374d43f66102a80821d22188b6a0bc3bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:22 -0700 Subject: ipv6: guard IPV6_MINHOPCOUNT with a static key RFC 5082 IPV6_MINHOPCOUNT is rarely used on hosts. Add a static key to remove from TCP fast path useless code, and potential cache line miss to fetch tcp_inet6_sk(sk)->min_hopcount Note that once ip6_min_hopcount static key has been enabled, it stays enabled until next boot. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f2d0ecc257bb..c19bf51ded1d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1092,6 +1092,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, /* * socket options (ipv6_sockglue.c) */ +DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount); int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); -- cgit v1.2.3 From 020e71a3cf7f50c0f2c54cf2444067b76fe6d785 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2021 09:48:24 -0700 Subject: ipv4: guard IP_MINTTL with a static key RFC 5082 IP_MINTTL option is rarely used on hosts. Add a static key to remove from TCP fast path useless code, and potential cache line miss to fetch inet_sk(sk)->min_ttl Note that once ip4_min_ttl static key has been enabled, it stays enabled until next boot. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index cf229a531194..b71e88507c4a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -750,6 +751,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); +DECLARE_STATIC_KEY_FALSE(ip4_min_ttl); int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, -- cgit v1.2.3