summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-10-14 17:39:38 -0700
committerJakub Kicinski <kuba@kernel.org>2024-10-14 17:39:38 -0700
commit42386ae4dedcf065a5097d6cad3d77caa84f910a (patch)
treeb6d28ee2b7994c49ec797f143840e22eabdbd45a /include
parent76d37e4fd638a582cf13bec243ebab0af3658d07 (diff)
parent79636038d37e7bd4d078238f2a3f002cab4423bc (diff)
Merge branch 'tcp-add-skb-sk-to-more-control-packets'
Eric Dumazet says: ==================== tcp: add skb->sk to more control packets Currently, TCP can set skb->sk for a variety of transmit packets. However, packets sent on behalf of a TIME_WAIT sockets do not have an attached socket. Same issue for RST packets. We want to change this, in order to increase eBPF program capabilities. This is slightly risky, because various layers could be confused by TIME_WAIT sockets showing up in skb->sk. v2: audited all sk_to_full_sk() users and addressed Martin feedback. ==================== Link: https://patch.msgid.link/20241010174817.1543642-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h2
-rw-r--r--include/net/inet_sock.h8
-rw-r--r--include/net/ip.h3
-rw-r--r--include/net/sock.h19
4 files changed, 28 insertions, 4 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ce91d9b2acb9..f0f219271daf 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
int __ret = 0; \
if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
- if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \
+ if (__sk && __sk == skb_to_full_sk(skb) && \
cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
CGROUP_INET_EGRESS); \
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f01dd273bea6..56d8bc5593d3 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -321,8 +321,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
static inline struct sock *sk_to_full_sk(struct sock *sk)
{
#ifdef CONFIG_INET
- if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+ if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
sk = inet_reqsk(sk)->rsk_listener;
+ if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
+ sk = NULL;
#endif
return sk;
}
@@ -331,8 +333,10 @@ static inline struct sock *sk_to_full_sk(struct sock *sk)
static inline const struct sock *sk_const_to_full_sk(const struct sock *sk)
{
#ifdef CONFIG_INET
- if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+ if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
sk = ((const struct request_sock *)sk)->rsk_listener;
+ if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
+ sk = NULL;
#endif
return sk;
}
diff --git a/include/net/ip.h b/include/net/ip.h
index bab084df1567..4be0a6a603b2 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
}
-void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
+ struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
diff --git a/include/net/sock.h b/include/net/sock.h
index 6da420ab1ee1..bf7fa3db10ae 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1760,6 +1760,15 @@ void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
void sock_pfree(struct sk_buff *skb);
+
+static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ }
+}
#else
#define sock_edemux sock_efree
#endif
@@ -2802,6 +2811,16 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
+/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT
+ * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE)
+ * TCP RST and ACK can be attached to TIME_WAIT.
+ */
+static inline bool sk_listener_or_tw(const struct sock *sk)
+{
+ return (1 << READ_ONCE(sk->sk_state)) &
+ (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT);
+}
+
void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);