From 3c82a21f4320c8d54cf6456b27c8d49e5ffb722e Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Wed, 7 Nov 2018 15:36:02 +0000 Subject: net: allow binding socket in a VRF when there's an unbound socket Change the inet socket lookup to avoid packets arriving on a device enslaved to an l3mdev from matching unbound sockets by removing the wildcard for non sk_bound_dev_if and instead relying on check against the secondary device index, which will be 0 when the input device is not enslaved to an l3mdev and so match against an unbound socket and not match when the input device is enslaved. Change the socket binding to take the l3mdev into account to allow an unbound socket to not conflict sockets bound to an l3mdev given the datapath isolation now guaranteed. Signed-off-by: Robert Shearman Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 5 ++--- include/net/inet_hashtables.h | 13 ++++++------- include/net/inet_sock.h | 13 +++++++++++++ 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 6e91e38a31da..9db98af46985 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -115,9 +115,8 @@ int inet6_hash(struct sock *sk); ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 9141e95529e7..4ae060b4bac2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -79,6 +79,7 @@ struct inet_ehash_bucket { struct inet_bind_bucket { possible_net_t ib_net; + int l3mdev; unsigned short port; signed char fastreuse; signed char fastreuseport; @@ -191,7 +192,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - const unsigned short snum); + const unsigned short snum, int l3mdev); void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); @@ -282,9 +283,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ @@ -294,9 +294,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ + (((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a80fd0ac4563..ed3f723af00b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,6 +130,19 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } +static inline int inet_sk_bound_l3mdev(const struct sock *sk) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + + if (!net->ipv4.sysctl_tcp_l3mdev_accept) + return l3mdev_master_ifindex_by_index(net, + sk->sk_bound_dev_if); +#endif + + return 0; +} + struct inet_cork { unsigned int flags; __be32 addr; -- cgit v1.2.3 From e78190581aff7c96fbd6324aa633170934650b65 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 7 Nov 2018 15:36:03 +0000 Subject: net: ensure unbound stream socket to be chosen when not in a VRF The commit a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") only ensures that the correct socket is selected for packets in a VRF. However, there is no guarantee that the unbound socket will be selected for packets when not in a VRF. By checking for a device match in compute_score() also for the case when there is no bound device and attaching a score to this, the unbound socket is selected. And if a failure is returned when there is no device match, this ensures that bound sockets are never selected, even if there is no unbound socket. Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 11 +++++++++++ include/net/inet_sock.h | 8 ++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4ae060b4bac2..0ce460e93dc4 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -189,6 +189,17 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ed3f723af00b..e8eef85006aa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -143,6 +143,14 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) return 0; } +static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, + int dif, int sdif) +{ + if (!bound_dev_if) + return !sdif || l3mdev_accept; + return bound_dev_if == dif || bound_dev_if == sdif; +} + struct inet_cork { unsigned int flags; __be32 addr; -- cgit v1.2.3 From 6da5b0f027a825df2aebc1927a27bda185dc03d4 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 7 Nov 2018 15:36:04 +0000 Subject: net: ensure unbound datagram socket to be chosen when not in a VRF Ensure an unbound datagram skt is chosen when not in a VRF. The check for a device match in compute_score() for UDP must be performed when there is no device match. For this, a failure is returned when there is no device match. This ensures that bound sockets are never selected, even if there is no unbound socket. Allow IPv6 packets to be sent over a datagram skt bound to a VRF. These packets are currently blocked, as flowi6_oif was set to that of the master vrf device, and the ipi6_ifindex is that of the slave device. Allow these packets to be sent by checking the device with ipi6_ifindex has the same L3 scope as that of the bound device of the skt, which is the master vrf device. Note that this check always succeeds if the skt is unbound. Even though the right datagram skt is now selected by compute_score(), a different skt is being returned that is bound to the wrong vrf. The difference between these and stream sockets is the handling of the skt option for SO_REUSEPORT. While the handling when adding a skt for reuse correctly checks that the bound device of the skt is a match, the skts in the hashslot are already incorrect. So for the same hash, a skt for the wrong vrf may be selected for the required port. The root cause is that the skt is immediately placed into a slot when it is created, but when the skt is then bound using SO_BINDTODEVICE, it remains in the same slot. The solution is to move the skt to the correct slot by forcing a rehash. Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/udp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 9e82cb391dea..a496e441645e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -252,6 +252,17 @@ static inline int udp_rqueue_get(struct sock *sk) return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); } +static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + /* net/ipv4/udp.c */ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); -- cgit v1.2.3 From 6897445fb194c8ad046df4a13e1ee9f080a5a21e Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 7 Nov 2018 15:36:05 +0000 Subject: net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs Add a sysctl raw_l3mdev_accept to control raw socket lookup in a manner similar to use of tcp_l3mdev_accept for stream and of udp_l3mdev_accept for datagram sockets. Have this default to enabled for reasons of backwards compatibility. This is so as to specify the output device with cmsg and IP_PKTINFO, but using a socket not bound to the corresponding VRF. This allows e.g. older ping implementations to be run with specifying the device but without executing it in the VRF. If the option is disabled, packets received in a VRF context are only handled by a raw socket bound to the VRF, and correspondingly packets in the default VRF are only handled by a socket not bound to any VRF. Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ include/net/raw.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e47503b4e4d1..104a6669e344 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -103,6 +103,9 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_raw_l3mdev_accept; +#endif int sysctl_tcp_early_demux; int sysctl_udp_early_demux; diff --git a/include/net/raw.h b/include/net/raw.h index 9c9fa98a91a4..20ebf0b3dfa8 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v); int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); +void raw_init(void); struct raw_sock { /* inet_sock has to be the first member */ -- cgit v1.2.3 From 7055420fb6a1cb754a64be99ddcabd45bd902d99 Mon Sep 17 00:00:00 2001 From: Duncan Eastoe Date: Wed, 7 Nov 2018 15:36:06 +0000 Subject: net: fix raw socket lookup device bind matching with VRFs When there exist a pair of raw sockets one unbound and one bound to a VRF but equal in all other respects, when a packet is received in the VRF context, __raw_v4_lookup() matches on both sockets. This results in the packet being delivered over both sockets, instead of only the raw socket bound to the VRF. The bound device checks in __raw_v4_lookup() are replaced with a call to raw_sk_bound_dev_eq() which correctly handles whether the packet should be delivered over the unbound socket in such cases. In __raw_v6_lookup() the match on the device binding of the socket is similarly updated to use raw_sk_bound_dev_eq() which matches the handling in __raw_v4_lookup(). Importantly raw_sk_bound_dev_eq() takes the raw_l3mdev_accept sysctl into account. Signed-off-by: Duncan Eastoe Signed-off-by: Mike Manning Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/raw.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 20ebf0b3dfa8..821ff4887f77 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,7 +17,7 @@ #ifndef _RAW_H #define _RAW_H - +#include #include #include @@ -75,4 +75,15 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + #endif /* _RAW_H */ -- cgit v1.2.3