summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_minisocks.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-08-31 20:43:37 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-31 20:43:37 -0400
commit1bed966cc3bd4042110129f0fc51aeeb59c5b200 (patch)
tree0d5b9181b840c9b6b08b1452004f0746e8eebab8 /net/ipv4/tcp_minisocks.c
parent2a35cfa591ac63f17815c2d9432b799e37527980 (diff)
parent168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef (diff)
Merge branch 'tcp_fastopen_server'
Jerry Chu says: ==================== This patch series provides the server (passive open) side code for TCP Fast Open. Together with the earlier client side patches it completes the TCP Fast Open implementation. The server side Fast Open code accepts data carried in the SYN packet with a valid Fast Open cookie, and passes it to the application right away, allowing application to send back response data, all before TCP's 3-way handshake finishes. A simple cookie scheme together with capping the number of outstanding TFO requests (still in TCP_SYN_RECV state) to a limit per listener forms the main line of defense against spoofed SYN attacks. For more details about TCP Fast Open see our IETF internet draft at http://www.ietf.org/id/draft-ietf-tcpm-fastopen-01.txt and a research paper at http://conferences.sigcomm.org/co-next/2011/papers/1569470463.pdf A prototype implementation was first developed by Sivasankar Radhakrishnan (sivasankar@cs.ucsd.edu). A patch based on an older version of Linux kernel has been undergoing internal tests at Google for the past few months. Jerry Chu (3): tcp: TCP Fast Open Server - header & support functions tcp: TCP Fast Open Server - support TFO listeners tcp: TCP Fast Open Server - main code path ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_minisocks.c')
-rw-r--r--net/ipv4/tcp_minisocks.c61
1 files changed, 50 insertions, 11 deletions
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6ff7f10dce9d..e965319d610b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -507,6 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
+ newtp->fastopen_rsk = NULL;
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
@@ -515,13 +516,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
EXPORT_SYMBOL(tcp_create_openreq_child);
/*
- * Process an incoming packet for SYN_RECV sockets represented
- * as a request_sock.
+ * Process an incoming packet for SYN_RECV sockets represented as a
+ * request_sock. Normally sk is the listener socket but for TFO it
+ * points to the child socket.
+ *
+ * XXX (TFO) - The current impl contains a special check for ack
+ * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct request_sock **prev)
+ struct request_sock **prev,
+ bool fastopen)
{
struct tcp_options_received tmp_opt;
const u8 *hash_location;
@@ -530,6 +536,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
+ BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
+
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
@@ -565,6 +573,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*
* Enforce "SYN-ACK" according to figure 8, figure 6
* of RFC793, fixed by RFC1122.
+ *
+ * Note that even if there is new data in the SYN packet
+ * they will be thrown away too.
*/
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
return NULL;
@@ -622,9 +633,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* sent (the segment carries an unacceptable ACK) ...
* a reset is sent."
*
- * Invalid ACK: reset will be sent by listening socket
+ * Invalid ACK: reset will be sent by listening socket.
+ * Note that the ACK validity check for a Fast Open socket is done
+ * elsewhere and is checked directly against the child socket rather
+ * than req because user data may have been sent out.
*/
- if ((flg & TCP_FLAG_ACK) &&
+ if ((flg & TCP_FLAG_ACK) && !fastopen &&
(TCP_SKB_CB(skb)->ack_seq !=
tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
return sk;
@@ -637,7 +651,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
+ tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(sk, skb, req);
@@ -648,7 +662,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
- if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
+ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
req->ts_recent = tmp_opt.rcv_tsval;
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
@@ -667,10 +681,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
+ *
+ * XXX (TFO) - if we ever allow "data after SYN", the
+ * following check needs to be removed.
*/
if (!(flg & TCP_FLAG_ACK))
return NULL;
+ /* For Fast Open no more processing is needed (sk is the
+ * child socket).
+ */
+ if (fastopen)
+ return sk;
+
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
@@ -706,11 +729,21 @@ listen_overflow:
}
embryonic_reset:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
- if (!(flg & TCP_FLAG_RST))
+ if (!(flg & TCP_FLAG_RST)) {
+ /* Received a bad SYN pkt - for TFO We try not to reset
+ * the local connection unless it's really necessary to
+ * avoid becoming vulnerable to outside attack aiming at
+ * resetting legit local connections.
+ */
req->rsk_ops->send_reset(sk, skb);
-
- inet_csk_reqsk_queue_drop(sk, req, prev);
+ } else if (fastopen) { /* received a valid RST pkt */
+ reqsk_fastopen_remove(sk, req, true);
+ tcp_reset(sk);
+ }
+ if (!fastopen) {
+ inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ }
return NULL;
}
EXPORT_SYMBOL(tcp_check_req);
@@ -719,6 +752,12 @@ EXPORT_SYMBOL(tcp_check_req);
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
+ *
+ * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
+ * when entering. But other states are possible due to a race condition
+ * where after __inet_lookup_established() fails but before the listener
+ * locked is obtained, other packets cause the same connection to
+ * be created.
*/
int tcp_child_process(struct sock *parent, struct sock *child,