diff options
| author | David S. Miller <davem@davemloft.net> | 2023-08-16 11:09:18 +0100 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2023-08-16 11:09:18 +0100 |
| commit | 569dce3f8e6406fe220752baf9133d9cdc0b63a8 (patch) | |
| tree | 6f331e6826eefbbc3a1bfc3fc24eac5434fabfad /include/net | |
| parent | 936db833c2dd0a9ae738c8ce24fff816c9c8e381 (diff) | |
| parent | 12af73269fd942c98ff9999a2ce0c04165aae136 (diff) | |
Merge branch 'inet-data-races'
Eric Dumazet says:
====================
inet: socket lock and data-races avoidance
In this series, I converted 20 bits in "struct inet_sock" and made
them truly atomic.
This allows to implement many IP_ socket options in a lockless
fashion (no need to acquire socket lock), and fixes data-races
that were showing up in various KCSAN reports.
I also took care of IP_TTL/IP_MINTTL, but left few other options
for another series.
v4: Rebased after recent mptcp changes.
Added Reviewed-by: tags from Simon (thanks !)
v3: fixed patch 7, feedback from build bot about ipvs set_mcast_loop()
v2: addressed a feedback from a build bot in patch 9 by removing
unused issk variable in mptcp_setsockopt_sol_ip_set_transparent()
Added Acked-by: tags from Soheil (thanks !)
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
| -rw-r--r-- | include/net/inet_connection_sock.h | 4 | ||||
| -rw-r--r-- | include/net/inet_sock.h | 92 | ||||
| -rw-r--r-- | include/net/ipv6.h | 3 | ||||
| -rw-r--r-- | include/net/route.h | 2 | ||||
| -rw-r--r-- | include/net/tcp.h | 2 |
5 files changed, 68 insertions, 35 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index be3c858a2ebb..5d2fcc137b88 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -342,9 +342,9 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline bool inet_csk_has_ulp(struct sock *sk) +static inline bool inet_csk_has_ulp(const struct sock *sk) { - return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; + return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops; } #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 0bb32bfc6183..acbb93d7607a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -194,13 +194,13 @@ struct rtable; * @inet_rcv_saddr - Bound local IPv4 addr * @inet_dport - Destination port * @inet_num - Local port + * @inet_flags - various atomic flags * @inet_saddr - Sending source * @uc_ttl - Unicast TTL * @inet_sport - Source port * @inet_id - ID counter for DF pkts * @tos - TOS * @mc_ttl - Multicasting TTL - * @is_icsk - is this an inet_connection_sock? * @uc_index - Unicast outgoing device index * @mc_index - Multicast device index * @mc_list - Group array @@ -218,57 +218,88 @@ struct inet_sock { #define inet_dport sk.__sk_common.skc_dport #define inet_num sk.__sk_common.skc_num + unsigned long inet_flags; __be32 inet_saddr; __s16 uc_ttl; - __u16 cmsg_flags; - struct ip_options_rcu __rcu *inet_opt; __be16 inet_sport; + struct ip_options_rcu __rcu *inet_opt; __u16 inet_id; __u8 tos; __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; - __u8 recverr:1, - is_icsk:1, - freebind:1, - hdrincl:1, - mc_loop:1, - transparent:1, - mc_all:1, - nodefrag:1; - __u8 bind_address_no_port:1, - recverr_rfc4884:1, - defer_connect:1; /* Indicates that fastopen_connect is set - * and cookie exists so we defer connect - * until first data frame is written - */ __u8 rcv_tos; __u8 convert_csum; int uc_index; int mc_index; __be32 mc_addr; - struct ip_mc_socklist __rcu *mc_list; - struct inet_cork_full cork; struct { __u16 lo; __u16 hi; } local_port_range; + + struct ip_mc_socklist __rcu *mc_list; + struct inet_cork_full cork; }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ #define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ +enum { + INET_FLAGS_PKTINFO = 0, + INET_FLAGS_TTL = 1, + INET_FLAGS_TOS = 2, + INET_FLAGS_RECVOPTS = 3, + INET_FLAGS_RETOPTS = 4, + INET_FLAGS_PASSSEC = 5, + INET_FLAGS_ORIGDSTADDR = 6, + INET_FLAGS_CHECKSUM = 7, + INET_FLAGS_RECVFRAGSIZE = 8, + + INET_FLAGS_RECVERR = 9, + INET_FLAGS_RECVERR_RFC4884 = 10, + INET_FLAGS_FREEBIND = 11, + INET_FLAGS_HDRINCL = 12, + INET_FLAGS_MC_LOOP = 13, + INET_FLAGS_MC_ALL = 14, + INET_FLAGS_TRANSPARENT = 15, + INET_FLAGS_IS_ICSK = 16, + INET_FLAGS_NODEFRAG = 17, + INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, + INET_FLAGS_DEFER_CONNECT = 19, +}; + /* cmsg flags for inet */ -#define IP_CMSG_PKTINFO BIT(0) -#define IP_CMSG_TTL BIT(1) -#define IP_CMSG_TOS BIT(2) -#define IP_CMSG_RECVOPTS BIT(3) -#define IP_CMSG_RETOPTS BIT(4) -#define IP_CMSG_PASSSEC BIT(5) -#define IP_CMSG_ORIGDSTADDR BIT(6) -#define IP_CMSG_CHECKSUM BIT(7) -#define IP_CMSG_RECVFRAGSIZE BIT(8) +#define IP_CMSG_PKTINFO BIT(INET_FLAGS_PKTINFO) +#define IP_CMSG_TTL BIT(INET_FLAGS_TTL) +#define IP_CMSG_TOS BIT(INET_FLAGS_TOS) +#define IP_CMSG_RECVOPTS BIT(INET_FLAGS_RECVOPTS) +#define IP_CMSG_RETOPTS BIT(INET_FLAGS_RETOPTS) +#define IP_CMSG_PASSSEC BIT(INET_FLAGS_PASSSEC) +#define IP_CMSG_ORIGDSTADDR BIT(INET_FLAGS_ORIGDSTADDR) +#define IP_CMSG_CHECKSUM BIT(INET_FLAGS_CHECKSUM) +#define IP_CMSG_RECVFRAGSIZE BIT(INET_FLAGS_RECVFRAGSIZE) + +#define IP_CMSG_ALL (IP_CMSG_PKTINFO | IP_CMSG_TTL | \ + IP_CMSG_TOS | IP_CMSG_RECVOPTS | \ + IP_CMSG_RETOPTS | IP_CMSG_PASSSEC | \ + IP_CMSG_ORIGDSTADDR | IP_CMSG_CHECKSUM | \ + IP_CMSG_RECVFRAGSIZE) + +static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) +{ + return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL; +} + +#define inet_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) static inline bool sk_is_inet(struct sock *sk) { @@ -363,7 +394,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { __u8 flags = 0; - if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) + if (inet_test_bit(TRANSPARENT, sk) || inet_test_bit(HDRINCL, sk)) flags |= FLOWI_FLAG_ANYSRC; return flags; } @@ -389,7 +420,8 @@ static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || - inet->freebind || inet->transparent; + test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || + test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); } static inline bool inet_addr_valid_or_nonlocal(struct net *net, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 22643ffc2df8..d40d8238d4c2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -937,7 +937,8 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { return net->ipv6.sysctl.ip_nonlocal_bind || - inet->freebind || inet->transparent; + test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || + test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); } /* Sysctl settings for net ipv6.auto_flowlabels */ diff --git a/include/net/route.h b/include/net/route.h index d9ca98d2366f..51a45b1887b5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -298,7 +298,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, { __u8 flow_flags = 0; - if (inet_sk(sk)->transparent) + if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d77c08d83b7..07b21d9a9620 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2031,7 +2031,7 @@ static inline bool inet_sk_transparent(const struct sock *sk) case TCP_NEW_SYN_RECV: return inet_rsk(inet_reqsk(sk))->no_srccheck; } - return inet_sk(sk)->transparent; + return inet_test_bit(TRANSPARENT, sk); } /* Determines whether this is a thin stream (which may suffer from |
