summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2023-08-16 11:09:18 +0100
committerDavid S. Miller <davem@davemloft.net>2023-08-16 11:09:18 +0100
commit569dce3f8e6406fe220752baf9133d9cdc0b63a8 (patch)
tree6f331e6826eefbbc3a1bfc3fc24eac5434fabfad /include/net
parent936db833c2dd0a9ae738c8ce24fff816c9c8e381 (diff)
parent12af73269fd942c98ff9999a2ce0c04165aae136 (diff)
Merge branch 'inet-data-races'
Eric Dumazet says: ==================== inet: socket lock and data-races avoidance In this series, I converted 20 bits in "struct inet_sock" and made them truly atomic. This allows to implement many IP_ socket options in a lockless fashion (no need to acquire socket lock), and fixes data-races that were showing up in various KCSAN reports. I also took care of IP_TTL/IP_MINTTL, but left few other options for another series. v4: Rebased after recent mptcp changes. Added Reviewed-by: tags from Simon (thanks !) v3: fixed patch 7, feedback from build bot about ipvs set_mcast_loop() v2: addressed a feedback from a build bot in patch 9 by removing unused issk variable in mptcp_setsockopt_sol_ip_set_transparent() Added Acked-by: tags from Soheil (thanks !) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/inet_connection_sock.h4
-rw-r--r--include/net/inet_sock.h92
-rw-r--r--include/net/ipv6.h3
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/tcp.h2
5 files changed, 68 insertions, 35 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index be3c858a2ebb..5d2fcc137b88 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -342,9 +342,9 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline bool inet_csk_has_ulp(struct sock *sk)
+static inline bool inet_csk_has_ulp(const struct sock *sk)
{
- return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+ return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
}
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 0bb32bfc6183..acbb93d7607a 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -194,13 +194,13 @@ struct rtable;
* @inet_rcv_saddr - Bound local IPv4 addr
* @inet_dport - Destination port
* @inet_num - Local port
+ * @inet_flags - various atomic flags
* @inet_saddr - Sending source
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
* @tos - TOS
* @mc_ttl - Multicasting TTL
- * @is_icsk - is this an inet_connection_sock?
* @uc_index - Unicast outgoing device index
* @mc_index - Multicast device index
* @mc_list - Group array
@@ -218,57 +218,88 @@ struct inet_sock {
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num
+ unsigned long inet_flags;
__be32 inet_saddr;
__s16 uc_ttl;
- __u16 cmsg_flags;
- struct ip_options_rcu __rcu *inet_opt;
__be16 inet_sport;
+ struct ip_options_rcu __rcu *inet_opt;
__u16 inet_id;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
__u8 pmtudisc;
- __u8 recverr:1,
- is_icsk:1,
- freebind:1,
- hdrincl:1,
- mc_loop:1,
- transparent:1,
- mc_all:1,
- nodefrag:1;
- __u8 bind_address_no_port:1,
- recverr_rfc4884:1,
- defer_connect:1; /* Indicates that fastopen_connect is set
- * and cookie exists so we defer connect
- * until first data frame is written
- */
__u8 rcv_tos;
__u8 convert_csum;
int uc_index;
int mc_index;
__be32 mc_addr;
- struct ip_mc_socklist __rcu *mc_list;
- struct inet_cork_full cork;
struct {
__u16 lo;
__u16 hi;
} local_port_range;
+
+ struct ip_mc_socklist __rcu *mc_list;
+ struct inet_cork_full cork;
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
+enum {
+ INET_FLAGS_PKTINFO = 0,
+ INET_FLAGS_TTL = 1,
+ INET_FLAGS_TOS = 2,
+ INET_FLAGS_RECVOPTS = 3,
+ INET_FLAGS_RETOPTS = 4,
+ INET_FLAGS_PASSSEC = 5,
+ INET_FLAGS_ORIGDSTADDR = 6,
+ INET_FLAGS_CHECKSUM = 7,
+ INET_FLAGS_RECVFRAGSIZE = 8,
+
+ INET_FLAGS_RECVERR = 9,
+ INET_FLAGS_RECVERR_RFC4884 = 10,
+ INET_FLAGS_FREEBIND = 11,
+ INET_FLAGS_HDRINCL = 12,
+ INET_FLAGS_MC_LOOP = 13,
+ INET_FLAGS_MC_ALL = 14,
+ INET_FLAGS_TRANSPARENT = 15,
+ INET_FLAGS_IS_ICSK = 16,
+ INET_FLAGS_NODEFRAG = 17,
+ INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
+ INET_FLAGS_DEFER_CONNECT = 19,
+};
+
/* cmsg flags for inet */
-#define IP_CMSG_PKTINFO BIT(0)
-#define IP_CMSG_TTL BIT(1)
-#define IP_CMSG_TOS BIT(2)
-#define IP_CMSG_RECVOPTS BIT(3)
-#define IP_CMSG_RETOPTS BIT(4)
-#define IP_CMSG_PASSSEC BIT(5)
-#define IP_CMSG_ORIGDSTADDR BIT(6)
-#define IP_CMSG_CHECKSUM BIT(7)
-#define IP_CMSG_RECVFRAGSIZE BIT(8)
+#define IP_CMSG_PKTINFO BIT(INET_FLAGS_PKTINFO)
+#define IP_CMSG_TTL BIT(INET_FLAGS_TTL)
+#define IP_CMSG_TOS BIT(INET_FLAGS_TOS)
+#define IP_CMSG_RECVOPTS BIT(INET_FLAGS_RECVOPTS)
+#define IP_CMSG_RETOPTS BIT(INET_FLAGS_RETOPTS)
+#define IP_CMSG_PASSSEC BIT(INET_FLAGS_PASSSEC)
+#define IP_CMSG_ORIGDSTADDR BIT(INET_FLAGS_ORIGDSTADDR)
+#define IP_CMSG_CHECKSUM BIT(INET_FLAGS_CHECKSUM)
+#define IP_CMSG_RECVFRAGSIZE BIT(INET_FLAGS_RECVFRAGSIZE)
+
+#define IP_CMSG_ALL (IP_CMSG_PKTINFO | IP_CMSG_TTL | \
+ IP_CMSG_TOS | IP_CMSG_RECVOPTS | \
+ IP_CMSG_RETOPTS | IP_CMSG_PASSSEC | \
+ IP_CMSG_ORIGDSTADDR | IP_CMSG_CHECKSUM | \
+ IP_CMSG_RECVFRAGSIZE)
+
+static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
+{
+ return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
+}
+
+#define inet_test_bit(nr, sk) \
+ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_set_bit(nr, sk) \
+ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_clear_bit(nr, sk) \
+ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_assign_bit(nr, sk, val) \
+ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
static inline bool sk_is_inet(struct sock *sk)
{
@@ -363,7 +394,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
__u8 flags = 0;
- if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
+ if (inet_test_bit(TRANSPARENT, sk) || inet_test_bit(HDRINCL, sk))
flags |= FLOWI_FLAG_ANYSRC;
return flags;
}
@@ -389,7 +420,8 @@ static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
static inline bool inet_addr_valid_or_nonlocal(struct net *net,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 22643ffc2df8..d40d8238d4c2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -937,7 +937,8 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return net->ipv6.sysctl.ip_nonlocal_bind ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
/* Sysctl settings for net ipv6.auto_flowlabels */
diff --git a/include/net/route.h b/include/net/route.h
index d9ca98d2366f..51a45b1887b5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -298,7 +298,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
{
__u8 flow_flags = 0;
- if (inet_sk(sk)->transparent)
+ if (inet_test_bit(TRANSPARENT, sk))
flow_flags |= FLOWI_FLAG_ANYSRC;
flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d77c08d83b7..07b21d9a9620 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2031,7 +2031,7 @@ static inline bool inet_sk_transparent(const struct sock *sk)
case TCP_NEW_SYN_RECV:
return inet_rsk(inet_reqsk(sk))->no_srccheck;
}
- return inet_sk(sk)->transparent;
+ return inet_test_bit(TRANSPARENT, sk);
}
/* Determines whether this is a thin stream (which may suffer from