From b18afb6f42292a9154102fed7265ae747bbfbc57 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:09 -0800 Subject: tcp: Move tcp_ns_to_ts() to tcp.h We will support arbitrary SYN Cookie with BPF. When BPF prog validates ACK and kfunc allocates a reqsk, we need to call tcp_ns_to_ts() to calculate an offset of TSval for later use: time t0 : Send SYN+ACK -> tsval = Initial TSval (Random Number) t1 : Recv ACK of 3WHS -> tsoff = TSecr - tcp_ns_to_ts(usec_ts_ok, tcp_clock_ns()) = Initial TSval - t1 t2 : Send ACK -> tsval = t2 + tsoff = Initial TSval + (t2 - t1) = Initial TSval + Time Delta (x) (x) Note that the time delta does not include the initial RTT from t0 to t1. Let's move tcp_ns_to_ts() to tcp.h. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240115205514.68364-2-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a1181031..114000e71a46 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -577,6 +577,15 @@ static inline u32 tcp_cookie_time(void) return val; } +/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ +static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val) +{ + if (usec_ts) + return div_u64(val, NSEC_PER_USEC); + + return div_u64(val, NSEC_PER_MSEC); +} + u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -- cgit v1.2.3 From 95e752b5299fa8c90099f7bc2aa1ee3e2e2c95ab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:10 -0800 Subject: tcp: Move skb_steal_sock() to request_sock.h We will support arbitrary SYN Cookie with BPF. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to TCP stack as skb->sk with req->syncookie 1. In skb_steal_sock(), we need to check inet_reqsk(sk)->syncookie to see if the reqsk is created by kfunc. However, inet_reqsk() is not available in sock.h. Let's move skb_steal_sock() to request_sock.h. While at it, we refactor skb_steal_sock() so it returns early if skb->sk is NULL to minimise the following patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240115205514.68364-3-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/request_sock.h | 28 ++++++++++++++++++++++++++++ include/net/sock.h | 25 ------------------------- 2 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 144c39db9898..26c630c40abb 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -83,6 +83,34 @@ static inline struct sock *req_to_sk(struct request_sock *req) return (struct sock *)req; } +/** + * skb_steal_sock - steal a socket from an sk_buff + * @skb: sk_buff to steal the socket from + * @refcounted: is set to true if the socket is reference-counted + * @prefetched: is set to true if the socket was assigned from bpf + */ +static inline struct sock *skb_steal_sock(struct sk_buff *skb, + bool *refcounted, bool *prefetched) +{ + struct sock *sk = skb->sk; + + if (!sk) { + *prefetched = false; + *refcounted = false; + return NULL; + } + + *prefetched = skb_sk_is_prefetched(skb); + if (*prefetched) + *refcounted = sk_is_refcounted(sk); + else + *refcounted = true; + + skb->destructor = NULL; + skb->sk = NULL; + return sk; +} + static inline struct request_sock * reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) diff --git a/include/net/sock.h b/include/net/sock.h index a7f815c7cfdf..32a399fdcbb5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2814,31 +2814,6 @@ sk_is_refcounted(struct sock *sk) return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); } -/** - * skb_steal_sock - steal a socket from an sk_buff - * @skb: sk_buff to steal the socket from - * @refcounted: is set to true if the socket is reference-counted - * @prefetched: is set to true if the socket was assigned from bpf - */ -static inline struct sock * -skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) -{ - if (skb->sk) { - struct sock *sk = skb->sk; - - *refcounted = true; - *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) - *refcounted = sk_is_refcounted(sk); - skb->destructor = NULL; - skb->sk = NULL; - return sk; - } - *prefetched = false; - *refcounted = false; - return NULL; -} - /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. * Check decrypted mark in case skb_orphan() cleared socket. -- cgit v1.2.3 From 8b5ac68fb5ee416537c1214cbacf0ddc4293cce9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:11 -0800 Subject: bpf: tcp: Handle BPF SYN Cookie in skb_steal_sock(). We will support arbitrary SYN Cookie with BPF. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to TCP stack as skb->sk with req->syncookie 1. Also, the reqsk has its listener as req->rsk_listener with no refcnt taken. When the TCP stack looks up a socket from the skb, we steal inet_reqsk(skb->sk)->rsk_listener in skb_steal_sock() so that the skb will be processed in cookie_v[46]_check() with the listener. Note that we do not clear skb->sk and skb->destructor so that we can carry the reqsk to cookie_v[46]_check(). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-4-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/request_sock.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 26c630c40abb..8839133d6f6b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -101,10 +101,21 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, } *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) + if (*prefetched) { +#if IS_ENABLED(CONFIG_SYN_COOKIES) + if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { + struct request_sock *req = inet_reqsk(sk); + + *refcounted = false; + sk = req->rsk_listener; + req->rsk_listener = NULL; + return sk; + } +#endif *refcounted = sk_is_refcounted(sk); - else + } else { *refcounted = true; + } skb->destructor = NULL; skb->sk = NULL; -- cgit v1.2.3 From 695751e31a63efd2bbe6779873adf1e4deb00cd5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:12 -0800 Subject: bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check(). We will support arbitrary SYN Cookie with BPF in the following patch. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to cookie_[46]_check() as skb->sk. If skb->sk is not NULL, we call cookie_bpf_check(). Then, we clear skb->sk and skb->destructor, which are needed not to hold refcnt for reqsk and the listener. See the following patch for details. After that, we finish initialisation for the remaining fields with cookie_tcp_reqsk_init(). Note that the server side WScale is set only for non-BPF SYN Cookie. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-5-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 114000e71a46..dfe99a084a71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -599,6 +599,26 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * dst_feature(dst, RTAX_FEATURE_ECN); } +#if IS_ENABLED(CONFIG_BPF) +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return skb->sk; +} + +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb); +#else +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return false; +} + +static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return NULL; +} +#endif + /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From e472f88891abbc535a5e16a68a104073985f6061 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:13 -0800 Subject: bpf: tcp: Support arbitrary SYN Cookie. This patch adds a new kfunc available at TC hook to support arbitrary SYN Cookie. The basic usage is as follows: struct bpf_tcp_req_attrs attrs = { .mss = mss, .wscale_ok = wscale_ok, .rcv_wscale = rcv_wscale, /* Server's WScale < 15 */ .snd_wscale = snd_wscale, /* Client's WScale < 15 */ .tstamp_ok = tstamp_ok, .rcv_tsval = tsval, .rcv_tsecr = tsecr, /* Server's Initial TSval */ .usec_ts_ok = usec_ts_ok, .sack_ok = sack_ok, .ecn_ok = ecn_ok, } skc = bpf_skc_lookup_tcp(...); sk = (struct sock *)bpf_skc_to_tcp_sock(skc); bpf_sk_assign_tcp_reqsk(skb, sk, attrs, sizeof(attrs)); bpf_sk_release(skc); bpf_sk_assign_tcp_reqsk() takes skb, a listener sk, and struct bpf_tcp_req_attrs and allocates reqsk and configures it. Then, bpf_sk_assign_tcp_reqsk() links reqsk with skb and the listener. The notable thing here is that we do not hold refcnt for both reqsk and listener. To differentiate that, we mark reqsk->syncookie, which is only used in TX for now. So, if reqsk->syncookie is 1 in RX, it means that the reqsk is allocated by kfunc. When skb is freed, sock_pfree() checks if reqsk->syncookie is 1, and in that case, we set NULL to reqsk->rsk_listener before calling reqsk_free() as reqsk does not hold a refcnt of the listener. When the TCP stack looks up a socket from the skb, we steal the listener from the reqsk in skb_steal_sock() and create a full sk in cookie_v[46]_check(). The refcnt of reqsk will finally be set to 1 in tcp_get_cookie_sock() after creating a full sk. Note that we can extend struct bpf_tcp_req_attrs in the future when we add a new attribute that is determined in 3WHS. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-6-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index dfe99a084a71..451dc1373970 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -600,6 +600,20 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * } #if IS_ENABLED(CONFIG_BPF) +struct bpf_tcp_req_attrs { + u32 rcv_tsval; + u32 rcv_tsecr; + u16 mss; + u8 rcv_wscale; + u8 snd_wscale; + u8 ecn_ok; + u8 wscale_ok; + u8 sack_ok; + u8 tstamp_ok; + u8 usec_ts_ok; + u8 reserved[3]; +}; + static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; -- cgit v1.2.3