diff options
Diffstat (limited to 'net/tcp.c')
-rw-r--r-- | net/tcp.c | 1297 |
1 files changed, 920 insertions, 377 deletions
diff --git a/net/tcp.c b/net/tcp.c index b0cc8a1fe3e..2635727f47d 100644 --- a/net/tcp.c +++ b/net/tcp.c @@ -25,33 +25,29 @@ #include <net/tcp.h> /* - * TCP sliding window control used by us to request re-TX + * The start sequence number increment for the two sequently created + * connections within the same timer tick. This number must be: + * - prime (to increase the time before the same number will be generated) + * - larger than typical MTU (to avoid similar numbers for two sequently + * created connections) */ -static struct tcp_sack_v tcp_lost; +#define TCP_START_SEQ_INC 2153 /* just large prime number */ -/* TCP option timestamp */ -static u32 loc_timestamp; -static u32 rmt_timestamp; +#define TCP_SEND_RETRY 3 +#define TCP_SEND_TIMEOUT 2000UL +#define TCP_RX_INACTIVE_TIMEOUT 30000UL +#if PKTBUFSRX != 0 + #define TCP_RCV_WND_SIZE (PKTBUFSRX * TCP_MSS) +#else + #define TCP_RCV_WND_SIZE (4 * TCP_MSS) +#endif -static u32 tcp_seq_init; -static u32 tcp_ack_edge; +#define TCP_PACKET_OK 0 +#define TCP_PACKET_DROP 1 -static int tcp_activity_count; +static struct tcp_stream tcp_stream; -/* - * Search for TCP_SACK and review the comments before the code section - * TCP_SACK is the number of packets at the front of the stream - */ - -enum pkt_state {PKT, NOPKT}; -struct sack_r { - struct sack_edges se; - enum pkt_state st; -}; - -static struct sack_r edge_a[TCP_SACK]; -static unsigned int sack_idx; -static unsigned int prev_len; +static int (*tcp_stream_on_create)(struct tcp_stream *tcp); /* * TCP lengths are stored as a rounded up number of 32 bit words. @@ -60,52 +56,386 @@ static unsigned int prev_len; */ #define LEN_B_TO_DW(x) ((x) >> 2) #define ROUND_TCPHDR_LEN(x) (LEN_B_TO_DW((x) + 3)) +#define ROUND_TCPHDR_BYTES(x) (((x) + 3) & ~3) #define SHIFT_TO_TCPHDRLEN_FIELD(x) ((x) << 4) #define GET_TCP_HDR_LEN_IN_BYTES(x) ((x) >> 2) -/* TCP connection state */ -static enum tcp_state current_tcp_state; +#define RANDOM_PORT_START 1024 +#define RANDOM_PORT_RANGE 0x4000 + +/** + * random_port() - make port a little random (1024-17407) + * + * Return: random port number from 1024 to 17407 + * + * This keeps the math somewhat trivial to compute, and seems to work with + * all supported protocols/clients/servers + */ +static uint random_port(void) +{ + return RANDOM_PORT_START + (get_timer(0) % RANDOM_PORT_RANGE); +} + +static inline s32 tcp_seq_cmp(u32 a, u32 b) +{ + return (s32)(a - b); +} + +static inline u32 tcp_get_start_seq(void) +{ + static u32 tcp_seq_inc; + u32 tcp_seq; + + tcp_seq = (get_timer(0) & 0xffffffff) + tcp_seq_inc; + tcp_seq_inc += TCP_START_SEQ_INC; -/* Current TCP RX packet handler */ -static rxhand_tcp *tcp_packet_handler; + return tcp_seq; +} + +static inline ulong msec_to_ticks(ulong msec) +{ + return msec * CONFIG_SYS_HZ / 1000; +} /** - * tcp_get_tcp_state() - get current TCP state + * tcp_stream_get_state() - get TCP stream state + * @tcp: tcp stream * - * Return: Current TCP state + * Return: TCP stream state */ -enum tcp_state tcp_get_tcp_state(void) +enum tcp_state tcp_stream_get_state(struct tcp_stream *tcp) { - return current_tcp_state; + return tcp->state; } /** - * tcp_set_tcp_state() - set current TCP state + * tcp_stream_set_state() - set TCP stream state + * @tcp: tcp stream * @new_state: new TCP state */ -void tcp_set_tcp_state(enum tcp_state new_state) +static void tcp_stream_set_state(struct tcp_stream *tcp, + enum tcp_state new_state) { - current_tcp_state = new_state; + tcp->state = new_state; } -static void dummy_handler(uchar *pkt, u16 dport, - struct in_addr sip, u16 sport, - u32 tcp_seq_num, u32 tcp_ack_num, - u8 action, unsigned int len) +/** + * tcp_stream_get_status() - get TCP stream status + * @tcp: tcp stream + * + * Return: TCP stream status + */ +enum tcp_status tcp_stream_get_status(struct tcp_stream *tcp) { + return tcp->status; } /** - * tcp_set_tcp_handler() - set a handler to receive data - * @f: handler + * tcp_stream_set_status() - set TCP stream state + * @tcp: tcp stream + * @new_satus: new TCP stream status */ -void tcp_set_tcp_handler(rxhand_tcp *f) +static void tcp_stream_set_status(struct tcp_stream *tcp, + enum tcp_status new_status) +{ + tcp->status = new_status; +} + +void tcp_stream_restart_rx_timer(struct tcp_stream *tcp) +{ + tcp->time_last_rx = get_timer(0); +} + +static void tcp_stream_init(struct tcp_stream *tcp, + struct in_addr rhost, u16 rport, u16 lport) +{ + memset(tcp, 0, sizeof(struct tcp_stream)); + tcp->rhost.s_addr = rhost.s_addr; + tcp->rport = rport; + tcp->lport = lport; + tcp->state = TCP_CLOSED; + tcp->lost.len = TCP_OPT_LEN_2; + tcp->rcv_wnd = TCP_RCV_WND_SIZE; + tcp->max_retry_count = TCP_SEND_RETRY; + tcp->initial_timeout = TCP_SEND_TIMEOUT; + tcp->rx_inactiv_timeout = TCP_RX_INACTIVE_TIMEOUT; + tcp_stream_restart_rx_timer(tcp); +} + +static void tcp_stream_destroy(struct tcp_stream *tcp) +{ + if (tcp->on_closed) + tcp->on_closed(tcp); + memset(tcp, 0, sizeof(struct tcp_stream)); +} + +void tcp_init(void) +{ + static int initialized; + struct tcp_stream *tcp = &tcp_stream; + + tcp_stream_on_create = NULL; + if (!initialized) { + initialized = 1; + memset(tcp, 0, sizeof(struct tcp_stream)); + } + + tcp_stream_set_state(tcp, TCP_CLOSED); + tcp_stream_set_status(tcp, TCP_ERR_RST); + tcp_stream_destroy(tcp); +} + +void tcp_stream_set_on_create_handler(int (*on_create)(struct tcp_stream *)) +{ + tcp_stream_on_create = on_create; +} + +static struct tcp_stream *tcp_stream_add(struct in_addr rhost, + u16 rport, u16 lport) +{ + struct tcp_stream *tcp = &tcp_stream; + + if (!tcp_stream_on_create || + tcp->state != TCP_CLOSED) + return NULL; + + tcp_stream_init(tcp, rhost, rport, lport); + if (!tcp_stream_on_create(tcp)) + return NULL; + + return tcp; +} + +struct tcp_stream *tcp_stream_get(int is_new, struct in_addr rhost, + u16 rport, u16 lport) +{ + struct tcp_stream *tcp = &tcp_stream; + + if (tcp->rhost.s_addr == rhost.s_addr && + tcp->rport == rport && + tcp->lport == lport) + return tcp; + + return is_new ? tcp_stream_add(rhost, rport, lport) : NULL; +} + +void tcp_stream_put(struct tcp_stream *tcp) +{ + if (tcp->state == TCP_CLOSED) + tcp_stream_destroy(tcp); +} + +u32 tcp_stream_rx_offs(struct tcp_stream *tcp) +{ + u32 ret; + + switch (tcp->state) { + case TCP_CLOSED: + case TCP_SYN_SENT: + case TCP_SYN_RECEIVED: + return 0; + default: + break; + } + + ret = tcp->rcv_nxt - tcp->irs - 1; + if (tcp->fin_rx && tcp->rcv_nxt == tcp->fin_rx_seq) + ret--; + + return ret; +} + +u32 tcp_stream_tx_offs(struct tcp_stream *tcp) +{ + u32 ret; + + switch (tcp->state) { + case TCP_CLOSED: + case TCP_SYN_SENT: + case TCP_SYN_RECEIVED: + return 0; + default: + break; + } + + ret = tcp->snd_una - tcp->iss - 1; + if (tcp->fin_tx && tcp->snd_una == tcp->fin_tx_seq + 1) + ret--; + + return ret; +} + +static void tcp_stream_set_time_handler(struct tcp_stream *tcp, ulong msec, + void (*handler)(struct tcp_stream *)) +{ + if (!msec) { + tcp->time_handler = NULL; + return; + } + + tcp->time_handler = handler; + tcp->time_start = get_timer(0); + tcp->time_delta = msec_to_ticks(msec); +} + +static void tcp_send_packet(struct tcp_stream *tcp, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num, u32 tx_len) +{ + tcp->tx_packets++; + net_send_tcp_packet(tx_len, tcp->rhost, tcp->rport, + tcp->lport, action, tcp_seq_num, + tcp_ack_num); +} + +static void tcp_send_repeat(struct tcp_stream *tcp) { - debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); - if (!f) - tcp_packet_handler = dummy_handler; - else - tcp_packet_handler = f; + uchar *ptr; + u32 tcp_opts_size; + int ret; + + if (!tcp->retry_cnt) { + puts("\nTCP: send retry counter exceeded\n"); + tcp_send_packet(tcp, TCP_RST, tcp->retry_seq_num, + tcp->rcv_nxt, 0); + tcp_stream_set_status(tcp, TCP_ERR_TOUT); + tcp_stream_set_state(tcp, TCP_CLOSED); + tcp_stream_destroy(tcp); + return; + } + tcp->retry_cnt--; + tcp->retry_timeout += tcp->initial_timeout; + + if (tcp->retry_tx_len > 0) { + tcp_opts_size = ROUND_TCPHDR_BYTES(TCP_TSOPT_SIZE + + tcp->lost.len); + ptr = net_tx_packet + net_eth_hdr_size() + + IP_TCP_HDR_SIZE + tcp_opts_size; + + if (tcp->retry_tx_len > TCP_MSS - tcp_opts_size) + tcp->retry_tx_len = TCP_MSS - tcp_opts_size; + + /* refill packet data */ + ret = tcp->tx(tcp, tcp->retry_tx_offs, ptr, tcp->retry_tx_len); + if (ret < 0) { + puts("\nTCP: send failure\n"); + tcp_send_packet(tcp, TCP_RST, tcp->retry_seq_num, + tcp->rcv_nxt, 0); + tcp_stream_set_status(tcp, TCP_ERR_IO); + tcp_stream_set_state(tcp, TCP_CLOSED); + tcp_stream_destroy(tcp); + return; + } + } + tcp_send_packet(tcp, tcp->retry_action, tcp->retry_seq_num, + tcp->rcv_nxt, tcp->retry_tx_len); + + tcp_stream_set_time_handler(tcp, tcp->retry_timeout, tcp_send_repeat); +} + +static void tcp_send_packet_with_retry(struct tcp_stream *tcp, u8 action, + u32 tcp_seq_num, u32 tx_len, u32 tx_offs) +{ + tcp->retry_cnt = tcp->max_retry_count; + tcp->retry_timeout = tcp->initial_timeout; + tcp->retry_action = action; + tcp->retry_seq_num = tcp_seq_num; + tcp->retry_tx_len = tx_len; + tcp->retry_tx_offs = tx_offs; + + tcp_send_packet(tcp, action, tcp_seq_num, tcp->rcv_nxt, tx_len); + tcp_stream_set_time_handler(tcp, tcp->retry_timeout, tcp_send_repeat); +} + +static inline u8 tcp_stream_fin_needed(struct tcp_stream *tcp, u32 tcp_seq_num) +{ + return (tcp->fin_tx && (tcp_seq_num == tcp->fin_tx_seq)) ? TCP_FIN : 0; +} + +static void tcp_steam_tx_try(struct tcp_stream *tcp) +{ + uchar *ptr; + int tx_len; + u32 tx_offs, tcp_opts_size; + + if (tcp->state != TCP_ESTABLISHED || + tcp->time_handler || + !tcp->tx) + return; + + tcp_opts_size = ROUND_TCPHDR_BYTES(TCP_TSOPT_SIZE + tcp->lost.len); + tx_len = TCP_MSS - tcp_opts_size; + if (tcp->fin_tx) { + /* do not try to send beyonds FIN packet limits */ + if (tcp_seq_cmp(tcp->snd_una, tcp->fin_tx_seq) >= 0) + return; + + tx_len = tcp->fin_tx_seq - tcp->snd_una; + if (tx_len > TCP_MSS - tcp_opts_size) + tx_len = TCP_MSS - tcp_opts_size; + } + + tx_offs = tcp_stream_tx_offs(tcp); + ptr = net_tx_packet + net_eth_hdr_size() + + IP_TCP_HDR_SIZE + tcp_opts_size; + + /* fill packet data and adjust size */ + tx_len = tcp->tx(tcp, tx_offs, ptr, tx_len); + if (tx_len < 0) { + puts("\nTCP: send failure\n"); + tcp_send_packet(tcp, TCP_RST, tcp->retry_seq_num, + tcp->rcv_nxt, 0); + tcp_stream_set_status(tcp, TCP_ERR_IO); + tcp_stream_set_state(tcp, TCP_CLOSED); + tcp_stream_destroy(tcp); + return; + } + if (!tx_len) + return; + + if (tcp_seq_cmp(tcp->snd_una + tx_len, tcp->snd_nxt) > 0) + tcp->snd_nxt = tcp->snd_una + tx_len; + + tcp_send_packet_with_retry(tcp, TCP_ACK | TCP_PUSH, + tcp->snd_una, tx_len, tx_offs); +} + +static void tcp_stream_poll(struct tcp_stream *tcp, ulong time) +{ + ulong delta; + void (*handler)(struct tcp_stream *tcp); + + if (tcp->state == TCP_CLOSED) + return; + + /* handle rx inactivity timeout */ + delta = msec_to_ticks(tcp->rx_inactiv_timeout); + if (time - tcp->time_last_rx >= delta) { + puts("\nTCP: rx inactivity timeout exceeded\n"); + tcp_stream_reset(tcp); + tcp_stream_set_status(tcp, TCP_ERR_TOUT); + tcp_stream_destroy(tcp); + return; + } + + /* handle retransmit timeout */ + if (tcp->time_handler && + time - tcp->time_start >= tcp->time_delta) { + handler = tcp->time_handler; + tcp->time_handler = NULL; + handler(tcp); + } + + tcp_steam_tx_try(tcp); +} + +void tcp_streams_poll(void) +{ + ulong time; + struct tcp_stream *tcp; + + time = get_timer(0); + tcp = &tcp_stream; + tcp_stream_poll(tcp, time); } /** @@ -148,29 +478,30 @@ u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, /** * net_set_ack_options() - set TCP options in acknowledge packets + * @tcp: tcp stream * @b: the packet * * Return: TCP header length */ -int net_set_ack_options(union tcp_build_pkt *b) +int net_set_ack_options(struct tcp_stream *tcp, union tcp_build_pkt *b) { b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); b->sack.t_opt.kind = TCP_O_TS; b->sack.t_opt.len = TCP_OPT_LEN_A; - b->sack.t_opt.t_snd = htons(loc_timestamp); - b->sack.t_opt.t_rcv = rmt_timestamp; + b->sack.t_opt.t_snd = htons(tcp->loc_timestamp); + b->sack.t_opt.t_rcv = tcp->rmt_timestamp; b->sack.sack_v.kind = TCP_1_NOP; b->sack.sack_v.len = 0; if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { - if (tcp_lost.len > TCP_OPT_LEN_2) { + if (tcp->lost.len > TCP_OPT_LEN_2) { debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", - tcp_lost.len); - b->sack.sack_v.len = tcp_lost.len; + tcp->lost.len); + b->sack.sack_v.len = tcp->lost.len; b->sack.sack_v.kind = TCP_V_SACK; - b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); - b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); + b->sack.sack_v.hill[0].l = htonl(tcp->lost.hill[0].l); + b->sack.sack_v.hill[0].r = htonl(tcp->lost.hill[0].r); /* * These SACK structures are initialized with NOPs to @@ -178,17 +509,17 @@ int net_set_ack_options(union tcp_build_pkt *b) * SACK structures used for both header padding and * internally. */ - b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); - b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); - b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); - b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); + b->sack.sack_v.hill[1].l = htonl(tcp->lost.hill[1].l); + b->sack.sack_v.hill[1].r = htonl(tcp->lost.hill[1].r); + b->sack.sack_v.hill[2].l = htonl(tcp->lost.hill[2].l); + b->sack.sack_v.hill[2].r = htonl(tcp->lost.hill[2].r); b->sack.sack_v.hill[3].l = TCP_O_NOP; b->sack.sack_v.hill[3].r = TCP_O_NOP; } b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + TCP_TSOPT_SIZE + - tcp_lost.len)); + tcp->lost.len)); } else { b->sack.sack_v.kind = 0; b->sack.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(ROUND_TCPHDR_LEN(TCP_HDR_SIZE + @@ -199,18 +530,18 @@ int net_set_ack_options(union tcp_build_pkt *b) * This returns the actual rounded up length of the * TCP header to add to the total packet length */ - return GET_TCP_HDR_LEN_IN_BYTES(b->sack.hdr.tcp_hlen); } /** - * net_set_ack_options() - set TCP options in SYN packets + * net_set_syn_options() - set TCP options in SYN packets + * @tcp: tcp stream * @b: the packet */ -void net_set_syn_options(union tcp_build_pkt *b) +void net_set_syn_options(struct tcp_stream *tcp, union tcp_build_pkt *b) { if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) - tcp_lost.len = 0; + tcp->lost.len = 0; b->ip.hdr.tcp_hlen = 0xa0; @@ -229,17 +560,40 @@ void net_set_syn_options(union tcp_build_pkt *b) } b->ip.t_opt.kind = TCP_O_TS; b->ip.t_opt.len = TCP_OPT_LEN_A; - loc_timestamp = get_ticks(); - rmt_timestamp = 0; + tcp->loc_timestamp = get_ticks(); + tcp->rmt_timestamp = 0; b->ip.t_opt.t_snd = 0; b->ip.t_opt.t_rcv = 0; b->ip.end = TCP_O_END; } -int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, +const char *tcpflags_to_str(char tcpflags, char *buf, int size) +{ + int i; + static const struct { + int bit; + const char *name; + } desc[] = {{TCP_RST, "RST"}, {TCP_SYN, "SYN"}, {TCP_PUSH, "PSH"}, + {TCP_FIN, "FIN"}, {TCP_ACK, "ACK"}}; + + *buf = '\0'; + for (i = 0; i < ARRAY_SIZE(desc); i++) { + if (!(tcpflags & desc[i].bit)) + continue; + + if (*buf) + strlcat(buf, ",", size); + strlcat(buf, desc[i].name, size); + } + + return buf; +} + +int tcp_set_tcp_header(struct tcp_stream *tcp, uchar *pkt, int payload_len, u8 action, u32 tcp_seq_num, u32 tcp_ack_num) { union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + char buf[24]; int pkt_hdr_len; int pkt_len; int tcp_len; @@ -249,79 +603,42 @@ int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, * 4 bits reserved options */ b->ip.hdr.tcp_flags = action; - pkt_hdr_len = IP_TCP_HDR_SIZE; b->ip.hdr.tcp_hlen = SHIFT_TO_TCPHDRLEN_FIELD(LEN_B_TO_DW(TCP_HDR_SIZE)); switch (action) { case TCP_SYN: debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:SYN (%pI4, %pI4, sq=%u, ak=%u)\n", - &net_server_ip, &net_ip, - tcp_seq_num, tcp_ack_num); - tcp_activity_count = 0; - net_set_syn_options(b); - tcp_seq_num = 0; - tcp_ack_num = 0; + "TCP Hdr:%s (%pI4, %pI4, s=%u, a=%u)\n", + tcpflags_to_str(action, buf, sizeof(buf)), + &tcp->rhost, &net_ip, tcp_seq_num, tcp_ack_num); + net_set_syn_options(tcp, b); pkt_hdr_len = IP_TCP_O_SIZE; - if (current_tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ - action = TCP_FIN; - current_tcp_state = TCP_FIN_WAIT_1; - } else { - current_tcp_state = TCP_SYN_SENT; - } - break; - case TCP_SYN | TCP_ACK: - case TCP_ACK: - pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); - b->ip.hdr.tcp_flags = action; - debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:ACK (%pI4, %pI4, s=%u, a=%u, A=%x)\n", - &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, - action); - break; - case TCP_FIN: - debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:FIN (%pI4, %pI4, s=%u, a=%u)\n", - &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); - payload_len = 0; - pkt_hdr_len = IP_TCP_HDR_SIZE; - current_tcp_state = TCP_FIN_WAIT_1; break; case TCP_RST | TCP_ACK: case TCP_RST: debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:RST (%pI4, %pI4, s=%u, a=%u)\n", - &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); - current_tcp_state = TCP_CLOSED; + "TCP Hdr:%s (%pI4, %pI4, s=%u, a=%u)\n", + tcpflags_to_str(action, buf, sizeof(buf)), + &tcp->rhost, &net_ip, tcp_seq_num, tcp_ack_num); + pkt_hdr_len = IP_TCP_HDR_SIZE; break; - /* Notify connection closing */ - case (TCP_FIN | TCP_ACK): - case (TCP_FIN | TCP_ACK | TCP_PUSH): - if (current_tcp_state == TCP_CLOSE_WAIT) - current_tcp_state = TCP_CLOSING; - - debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%u, a=%u, A=%x)\n", - &net_server_ip, &net_ip, - tcp_seq_num, tcp_ack_num, action); - fallthrough; default: - pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(b); - b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; + pkt_hdr_len = IP_HDR_SIZE + net_set_ack_options(tcp, b); debug_cond(DEBUG_DEV_PKT, - "TCP Hdr:dft (%pI4, %pI4, s=%u, a=%u, A=%x)\n", - &net_server_ip, &net_ip, - tcp_seq_num, tcp_ack_num, action); + "TCP Hdr:%s (%pI4, %pI4, s=%u, a=%u)\n", + tcpflags_to_str(action, buf, sizeof(buf)), + &tcp->rhost, &net_ip, tcp_seq_num, tcp_ack_num); + break; } pkt_len = pkt_hdr_len + payload_len; tcp_len = pkt_len - IP_HDR_SIZE; - tcp_ack_edge = tcp_ack_num; + tcp->rcv_nxt = tcp_ack_num; /* TCP Header */ - b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); - b->ip.hdr.tcp_src = htons(sport); - b->ip.hdr.tcp_dst = htons(dport); + b->ip.hdr.tcp_ack = htonl(tcp->rcv_nxt); + b->ip.hdr.tcp_src = htons(tcp->lport); + b->ip.hdr.tcp_dst = htons(tcp->rport); b->ip.hdr.tcp_seq = htonl(tcp_seq_num); /* @@ -339,143 +656,133 @@ int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, * it is, then the u-boot tftp or nfs kernel netboot should be * considered. */ - b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + b->ip.hdr.tcp_win = htons(tcp->rcv_wnd >> TCP_SCALE); b->ip.hdr.tcp_xsum = 0; b->ip.hdr.tcp_ugr = 0; - b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, tcp->rhost, tcp_len, pkt_len); - net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, + net_set_ip_header((uchar *)&b->ip, tcp->rhost, net_ip, pkt_len, IPPROTO_TCP); return pkt_hdr_len; } +static void tcp_update_rcv_nxt(struct tcp_stream *tcp) +{ + if (tcp_seq_cmp(tcp->rcv_nxt, tcp->lost.hill[0].l) >= 0) { + tcp->rcv_nxt = tcp->lost.hill[0].r; + + memmove(&tcp->lost.hill[0], &tcp->lost.hill[1], + (TCP_SACK_HILLS - 1) * sizeof(struct sack_edges)); + + tcp->lost.len -= TCP_OPT_LEN_8; + tcp->lost.hill[TCP_SACK_HILLS - 1].l = TCP_O_NOP; + tcp->lost.hill[TCP_SACK_HILLS - 1].r = TCP_O_NOP; + } +} + /** * tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer) + * @tcp: tcp stream * @tcp_seq_num: TCP sequence start number * @len: the length of sequence numbers */ -void tcp_hole(u32 tcp_seq_num, u32 len) -{ - u32 idx_sack, sack_in; - u32 sack_end = TCP_SACK - 1; - u32 hill = 0; - enum pkt_state expect = PKT; - u32 seq = tcp_seq_num - tcp_seq_init; - u32 hol_l = tcp_ack_edge - tcp_seq_init; - u32 hol_r = 0; - - /* Place new seq number in correct place in receive array */ - if (prev_len == 0) - prev_len = len; - - idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); - if (idx_sack < TCP_SACK) { - edge_a[idx_sack].se.l = tcp_seq_num; - edge_a[idx_sack].se.r = tcp_seq_num + len; - edge_a[idx_sack].st = PKT; - - /* - * The fin (last) packet is not the same length as data - * packets, and if it's length is recorded and used for - * array index calculation, calculation breaks. - */ - if (prev_len < len) - prev_len = len; - } - - debug_cond(DEBUG_DEV_PKT, - "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", - seq, hol_l, len, sack_idx, sack_end); +void tcp_hole(struct tcp_stream *tcp, u32 tcp_seq_num, u32 len) +{ + int i, j, cnt, cnt_move; - /* Right edge of contiguous stream, is the left edge of first hill */ - hol_l = tcp_seq_num - tcp_seq_init; - hol_r = hol_l + len; + cnt = (tcp->lost.len - TCP_OPT_LEN_2) / TCP_OPT_LEN_8; + for (i = 0; i < cnt; i++) { + if (tcp_seq_cmp(tcp->lost.hill[i].r, tcp_seq_num) < 0) + continue; + if (tcp_seq_cmp(tcp->lost.hill[i].l, tcp_seq_num + len) > 0) + break; - if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) - tcp_lost.len = TCP_OPT_LEN_2; + if (tcp_seq_cmp(tcp->lost.hill[i].l, tcp_seq_num) > 0) + tcp->lost.hill[i].l = tcp_seq_num; + if (tcp_seq_cmp(tcp->lost.hill[i].l, tcp_seq_num) < 0) { + len += tcp_seq_num - tcp->lost.hill[i].l; + tcp_seq_num = tcp->lost.hill[i].l; + } + if (tcp_seq_cmp(tcp->lost.hill[i].r, tcp_seq_num + len) >= 0) { + tcp_update_rcv_nxt(tcp); + return; + } - debug_cond(DEBUG_DEV_PKT, - "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", - idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); - - for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; - sack_in++) { - switch (expect) { - case NOPKT: - switch (edge_a[sack_in].st) { - case NOPKT: - debug_cond(DEBUG_INT_STATE, "N"); - break; - case PKT: - debug_cond(DEBUG_INT_STATE, "n"); - if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) { - tcp_lost.hill[hill].l = - edge_a[sack_in].se.l; - tcp_lost.hill[hill].r = - edge_a[sack_in].se.r; - } - expect = PKT; - break; - } - break; - case PKT: - switch (edge_a[sack_in].st) { - case NOPKT: - debug_cond(DEBUG_INT_STATE, "p"); - if (sack_in > sack_idx && - hill < TCP_SACK_HILLS) { - hill++; - if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) - tcp_lost.len += TCP_OPT_LEN_8; - } - expect = NOPKT; - break; - case PKT: - debug_cond(DEBUG_INT_STATE, "P"); - - if (tcp_ack_edge == edge_a[sack_in].se.l) { - tcp_ack_edge = edge_a[sack_in].se.r; - edge_a[sack_in].st = NOPKT; - sack_idx++; - } else { - if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && - hill < TCP_SACK_HILLS) - tcp_lost.hill[hill].r = - edge_a[sack_in].se.r; - if (IS_ENABLED(CONFIG_PROT_TCP_SACK) && - sack_in == sack_end - 1) - tcp_lost.hill[hill].r = - edge_a[sack_in].se.r; - } + /* check overlapping with next hills */ + cnt_move = 0; + tcp->lost.hill[i].r = tcp_seq_num + len; + for (j = i + 1; j < cnt; j++) { + if (tcp_seq_cmp(tcp->lost.hill[j].l, tcp->lost.hill[i].r) > 0) break; + + tcp->lost.hill[i].r = tcp->lost.hill[j].r; + cnt_move++; + } + + if (cnt_move > 0) { + if (cnt > i + cnt_move + 1) + memmove(&tcp->lost.hill[i + 1], + &tcp->lost.hill[i + cnt_move + 1], + cnt_move * sizeof(struct sack_edges)); + + cnt -= cnt_move; + tcp->lost.len = TCP_OPT_LEN_2 + cnt * TCP_OPT_LEN_8; + for (j = cnt; j < TCP_SACK_HILLS; j++) { + tcp->lost.hill[j].l = TCP_O_NOP; + tcp->lost.hill[j].r = TCP_O_NOP; } - break; } + + tcp_update_rcv_nxt(tcp); + return; } - debug_cond(DEBUG_INT_STATE, "\n"); - if (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || tcp_lost.len <= TCP_OPT_LEN_2) - sack_idx = 0; -} + + if (i == TCP_SACK_HILLS) { + tcp_update_rcv_nxt(tcp); + return; + } + + if (cnt < TCP_SACK_HILLS) { + cnt_move = cnt - i; + cnt++; + } else { + cnt = TCP_SACK_HILLS; + cnt_move = TCP_SACK_HILLS - i; + } + + if (cnt_move > 0) + memmove(&tcp->lost.hill[i + 1], + &tcp->lost.hill[i], + cnt_move * sizeof(struct sack_edges)); + + tcp->lost.hill[i].l = tcp_seq_num; + tcp->lost.hill[i].r = tcp_seq_num + len; + tcp->lost.len = TCP_OPT_LEN_2 + cnt * TCP_OPT_LEN_8; + + tcp_update_rcv_nxt(tcp); +}; /** * tcp_parse_options() - parsing TCP options + * @tcp: tcp stream * @o: pointer to the option field. * @o_len: length of the option field. */ -void tcp_parse_options(uchar *o, int o_len) +void tcp_parse_options(struct tcp_stream *tcp, uchar *o, int o_len) { struct tcp_t_opt *tsopt; + struct tcp_scale *wsopt; uchar *p = o; /* * NOPs are options with a zero length, and thus are special. * All other options have length fields. */ - for (p = o; p < (o + o_len); p = p + p[1]) { + for (p = o; p < (o + o_len); ) { if (!p[1]) return; /* Finished processing options */ @@ -483,150 +790,365 @@ void tcp_parse_options(uchar *o, int o_len) case TCP_O_END: return; case TCP_O_MSS: - case TCP_O_SCL: case TCP_P_SACK: case TCP_V_SACK: break; + case TCP_O_SCL: + wsopt = (struct tcp_scale *)p; + tcp->rmt_win_scale = wsopt->scale; + break; case TCP_O_TS: tsopt = (struct tcp_t_opt *)p; - rmt_timestamp = tsopt->t_snd; - return; + tcp->rmt_timestamp = tsopt->t_snd; + break; } /* Process optional NOPs */ if (p[0] == TCP_O_NOP) p++; + else + p += p[1]; } } -static u8 tcp_state_machine(u8 tcp_flags, u32 tcp_seq_num, int payload_len) +static int tcp_seg_in_wnd(struct tcp_stream *tcp, + u32 tcp_seq_num, int payload_len) { - u8 tcp_fin = tcp_flags & TCP_FIN; - u8 tcp_syn = tcp_flags & TCP_SYN; - u8 tcp_rst = tcp_flags & TCP_RST; - u8 tcp_push = tcp_flags & TCP_PUSH; - u8 tcp_ack = tcp_flags & TCP_ACK; - u8 action = TCP_DATA; - int i; + if (!payload_len && !tcp->rcv_wnd) { + if (tcp_seq_num == tcp->rcv_nxt) + return 1; + } + if (!payload_len && tcp->rcv_wnd > 0) { + if (tcp_seq_cmp(tcp->rcv_nxt, tcp_seq_num) <= 0 && + tcp_seq_cmp(tcp_seq_num, tcp->rcv_nxt + tcp->rcv_wnd) < 0) + return 1; + } + if (payload_len > 0 && tcp->rcv_wnd > 0) { + if (tcp_seq_cmp(tcp->rcv_nxt, tcp_seq_num) <= 0 && + tcp_seq_cmp(tcp_seq_num, tcp->rcv_nxt + tcp->rcv_wnd) < 0) + return 1; + tcp_seq_num += payload_len - 1; + if (tcp_seq_cmp(tcp->rcv_nxt, tcp_seq_num) <= 0 && + tcp_seq_cmp(tcp_seq_num, tcp->rcv_nxt + tcp->rcv_wnd) < 0) + return 1; + } + + return 0; +} + +static int tcp_rx_check_ack_num(struct tcp_stream *tcp, u32 tcp_seq_num, + u32 tcp_ack_num, u32 tcp_win_size) +{ + u32 old_offs, new_offs; + u8 action; + + switch (tcp->state) { + case TCP_SYN_RECEIVED: + if (tcp_seq_cmp(tcp->snd_una, tcp_ack_num) >= 0 || + tcp_seq_cmp(tcp_ack_num, tcp->snd_nxt) > 0) { + // segment acknowledgment is not acceptable + tcp_send_packet(tcp, TCP_RST, tcp_ack_num, 0, 0); + return TCP_PACKET_DROP; + } + + tcp_stream_set_state(tcp, TCP_ESTABLISHED); + tcp->snd_wnd = tcp_win_size; + tcp->snd_wl1 = tcp_seq_num; + tcp->snd_wl2 = tcp_ack_num; + + if (tcp->on_established) + tcp->on_established(tcp); + + fallthrough; + + case TCP_ESTABLISHED: + case TCP_FIN_WAIT_1: + case TCP_FIN_WAIT_2: + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + if (tcp_seq_cmp(tcp_ack_num, tcp->snd_nxt) > 0) { + // ACK acks something not yet sent + action = tcp_stream_fin_needed(tcp, tcp->snd_una) | TCP_ACK; + tcp_send_packet(tcp, action, tcp->snd_una, tcp->rcv_nxt, 0); + return TCP_PACKET_DROP; + } + + if (tcp_seq_cmp(tcp->snd_una, tcp_ack_num) < 0) { + old_offs = tcp_stream_tx_offs(tcp); + tcp->snd_una = tcp_ack_num; + new_offs = tcp_stream_tx_offs(tcp); + if (tcp->time_handler && + tcp_seq_cmp(tcp->snd_una, tcp->retry_seq_num) > 0) { + tcp_stream_set_time_handler(tcp, 0, NULL); + } + if (tcp->on_snd_una_update && + old_offs != new_offs) + tcp->on_snd_una_update(tcp, new_offs); + } + + if (tcp_seq_cmp(tcp->snd_una, tcp_ack_num) <= 0) { + if (tcp_seq_cmp(tcp->snd_wl1, tcp_seq_num) < 0 || + (tcp->snd_wl1 == tcp_seq_num && + tcp_seq_cmp(tcp->snd_wl2, tcp_seq_num) <= 0)) { + tcp->snd_wnd = tcp_win_size; + tcp->snd_wl1 = tcp_seq_num; + tcp->snd_wl2 = tcp_ack_num; + } + } + + if (tcp->state == TCP_FIN_WAIT_1) { + if (tcp->snd_una == tcp->snd_nxt) + tcp_stream_set_state(tcp, TCP_FIN_WAIT_2); + } + + if (tcp->state == TCP_CLOSING) { + if (tcp->snd_una == tcp->snd_nxt) + tcp_stream_set_state(tcp, TCP_CLOSED); + } + return TCP_PACKET_OK; + + case TCP_LAST_ACK: + if (tcp_ack_num == tcp->snd_nxt) + tcp_stream_set_state(tcp, TCP_CLOSED); + return TCP_PACKET_OK; + + default: + return TCP_PACKET_DROP; + } +} +static int tcp_rx_user_data(struct tcp_stream *tcp, u32 tcp_seq_num, + char *buf, int len) +{ + int tmp_len; + u32 buf_offs, old_offs, new_offs; + u8 action; + + if (!len) + return TCP_PACKET_OK; + + switch (tcp->state) { + case TCP_ESTABLISHED: + case TCP_FIN_WAIT_1: + case TCP_FIN_WAIT_2: + break; + default: + return TCP_PACKET_DROP; + } + + tmp_len = len; + old_offs = tcp_stream_rx_offs(tcp); + buf_offs = tcp_seq_num - tcp->irs - 1; + if (tcp->rx) { + tmp_len = tcp->rx(tcp, buf_offs, buf, len); + if (tmp_len < 0) { + puts("\nTCP: receive failure\n"); + tcp_send_packet(tcp, TCP_RST, tcp->snd_una, + tcp->rcv_nxt, 0); + tcp_stream_set_status(tcp, TCP_ERR_IO); + tcp_stream_set_state(tcp, TCP_CLOSED); + tcp_stream_destroy(tcp); + return TCP_PACKET_DROP; + } + } + if (tmp_len) + tcp_hole(tcp, tcp_seq_num, tmp_len); + + new_offs = tcp_stream_rx_offs(tcp); + if (tcp->on_rcv_nxt_update && old_offs != new_offs) + tcp->on_rcv_nxt_update(tcp, new_offs); + + action = tcp_stream_fin_needed(tcp, tcp->snd_una) | TCP_ACK; + tcp_send_packet(tcp, action, tcp->snd_una, tcp->rcv_nxt, 0); + + return TCP_PACKET_OK; +} + +void tcp_rx_state_machine(struct tcp_stream *tcp, + union tcp_build_pkt *b, unsigned int pkt_len) +{ + int tcp_len = pkt_len - IP_HDR_SIZE; + u32 tcp_seq_num, tcp_ack_num, tcp_win_size; + int tcp_hdr_len, payload_len; + u8 tcp_flags, action; + + tcp_hdr_len = GET_TCP_HDR_LEN_IN_BYTES(b->ip.hdr.tcp_hlen); + payload_len = tcp_len - tcp_hdr_len; + + if (tcp_hdr_len > TCP_HDR_SIZE) + tcp_parse_options(tcp, (uchar *)b + IP_TCP_HDR_SIZE, + tcp_hdr_len - TCP_HDR_SIZE); /* - * tcp_flags are examined to determine TX action in a given state - * tcp_push is interpreted to mean "inform the app" - * urg, ece, cer and nonce flags are not supported. - * - * exe and crw are use to signal and confirm knowledge of congestion. - * This TCP only sends a file request and acks. If it generates - * congestion, the network is broken. + * Incoming sequence and ack numbers are server's view of the numbers. + * The app must swap the numbers when responding. */ - debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); - if (tcp_rst) { - action = TCP_DATA; - current_tcp_state = TCP_CLOSED; - net_set_state(NETLOOP_FAIL); - debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); - return TCP_RST; - } + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); + tcp_win_size = ntohs(b->ip.hdr.tcp_win) << tcp->rmt_win_scale; - switch (current_tcp_state) { + tcp_flags = b->ip.hdr.tcp_flags; + +// printf("pkt: seq=%d, ack=%d, flags=%x, len=%d\n", +// tcp_seq_num - tcp->irs, tcp_ack_num - tcp->iss, tcp_flags, pkt_len); +// printf("tcp: rcv_nxt=%d, snd_una=%d, snd_nxt=%d\n\n", +// tcp->rcv_nxt - tcp->irs, tcp->snd_una - tcp->iss, tcp->snd_nxt - tcp->iss); + + switch (tcp->state) { case TCP_CLOSED: - debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); - if (tcp_syn) { - action = TCP_SYN | TCP_ACK; - tcp_seq_init = tcp_seq_num; - tcp_ack_edge = tcp_seq_num + 1; - current_tcp_state = TCP_SYN_RECEIVED; - } else if (tcp_ack || tcp_fin) { - action = TCP_DATA; + if (tcp_flags & TCP_RST) + return; + + if (tcp_flags & TCP_ACK) { + tcp_send_packet(tcp, TCP_RST, tcp_ack_num, 0, 0); + return; } - break; - case TCP_SYN_RECEIVED: + + if (!(tcp_flags & TCP_SYN)) + return; + + tcp->irs = tcp_seq_num; + tcp->rcv_nxt = tcp->irs + 1; + + tcp->iss = tcp_get_start_seq(); + tcp->snd_una = tcp->iss; + tcp->snd_nxt = tcp->iss + 1; + tcp->snd_wnd = tcp_win_size; + + tcp_stream_restart_rx_timer(tcp); + + tcp_stream_set_state(tcp, TCP_SYN_RECEIVED); + tcp_send_packet_with_retry(tcp, TCP_SYN | TCP_ACK, + tcp->iss, 0, 0); + return; + case TCP_SYN_SENT: - debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT | TCP_SYN_RECEIVED %x, %u\n", - tcp_flags, tcp_seq_num); - if (tcp_fin) { - action = action | TCP_PUSH; - current_tcp_state = TCP_CLOSE_WAIT; - } else if (tcp_ack || (tcp_syn && tcp_ack)) { - action |= TCP_ACK; - tcp_seq_init = tcp_seq_num; - tcp_ack_edge = tcp_seq_num + 1; - sack_idx = 0; - edge_a[sack_idx].se.l = tcp_ack_edge; - edge_a[sack_idx].se.r = tcp_ack_edge; - prev_len = 0; - current_tcp_state = TCP_ESTABLISHED; - for (i = 0; i < TCP_SACK; i++) - edge_a[i].st = NOPKT; - - if (tcp_syn && tcp_ack) - action |= TCP_PUSH; - } else { - action = TCP_DATA; + if (!(tcp_flags & TCP_ACK)) + return; + + if (tcp_seq_cmp(tcp_ack_num, tcp->iss) <= 0 || + tcp_seq_cmp(tcp_ack_num, tcp->snd_nxt) > 0) { + if (!(tcp_flags & TCP_RST)) + tcp_send_packet(tcp, TCP_RST, tcp_ack_num, 0, 0); + return; } - break; + + if (tcp_flags & TCP_RST) { + tcp_stream_set_status(tcp, TCP_ERR_RST); + tcp_stream_set_state(tcp, TCP_CLOSED); + return; + } + + if (!(tcp_flags & TCP_SYN)) + return; + + /* stop retransmit of SYN */ + tcp_stream_set_time_handler(tcp, 0, NULL); + + tcp->irs = tcp_seq_num; + tcp->rcv_nxt = tcp->irs + 1; + tcp->snd_una = tcp_ack_num; + + tcp_stream_restart_rx_timer(tcp); + + /* our SYN has been ACKed */ + tcp_stream_set_state(tcp, TCP_ESTABLISHED); + + if (tcp->on_established) + tcp->on_established(tcp); + + action = tcp_stream_fin_needed(tcp, tcp->snd_una) | TCP_ACK; + tcp_send_packet(tcp, action, tcp->snd_una, tcp->rcv_nxt, 0); + tcp_rx_user_data(tcp, tcp_seq_num, + ((char *)b) + pkt_len - payload_len, + payload_len); + return; + + case TCP_SYN_RECEIVED: case TCP_ESTABLISHED: - debug_cond(DEBUG_INT_STATE, "TCP_ESTABLISHED %x\n", tcp_flags); - if (payload_len > 0) { - tcp_hole(tcp_seq_num, payload_len); - tcp_fin = TCP_DATA; /* cause standalone FIN */ + case TCP_FIN_WAIT_1: + case TCP_FIN_WAIT_2: + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + case TCP_LAST_ACK: + if (!tcp_seg_in_wnd(tcp, tcp_seq_num, payload_len)) { + if (tcp_flags & TCP_RST) + return; + action = tcp_stream_fin_needed(tcp, tcp->snd_una) | TCP_ACK; + tcp_send_packet(tcp, action, tcp->snd_una, tcp->rcv_nxt, 0); + return; } - if ((tcp_fin) && - (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || - tcp_lost.len <= TCP_OPT_LEN_2)) { - action = action | TCP_FIN | TCP_PUSH | TCP_ACK; - current_tcp_state = TCP_CLOSE_WAIT; - } else if (tcp_ack) { - action = TCP_DATA; + tcp_stream_restart_rx_timer(tcp); + + if (tcp_flags & TCP_RST) { + tcp_stream_set_status(tcp, TCP_ERR_RST); + tcp_stream_set_state(tcp, TCP_CLOSED); + return; } - if (tcp_syn) - action = TCP_ACK + TCP_RST; - else if (tcp_push) - action = action | TCP_PUSH; - break; - case TCP_CLOSE_WAIT: - debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); - action = TCP_DATA; - break; - case TCP_FIN_WAIT_2: - debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); - if (tcp_ack) { - action = TCP_PUSH | TCP_ACK; - current_tcp_state = TCP_CLOSED; - puts("\n"); - } else if (tcp_syn) { - action = TCP_DATA; - } else if (tcp_fin) { - action = TCP_DATA; + if (tcp_flags & TCP_SYN) { + tcp_send_packet(tcp, TCP_RST, tcp_ack_num, 0, 0); + tcp_stream_set_status(tcp, TCP_ERR_RST); + tcp_stream_set_state(tcp, TCP_CLOSED); + return; } - break; - case TCP_FIN_WAIT_1: - debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); - if (tcp_fin) { - tcp_ack_edge++; - action = TCP_ACK | TCP_FIN; - current_tcp_state = TCP_FIN_WAIT_2; + + if (!(tcp_flags & TCP_ACK)) + return; + + if (tcp_rx_check_ack_num(tcp, tcp_seq_num, tcp_ack_num, + tcp_win_size) == TCP_PACKET_DROP) { + return; } - if (tcp_syn) - action = TCP_RST; - if (tcp_ack) - current_tcp_state = TCP_CLOSED; - break; - case TCP_CLOSING: - debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); - if (tcp_ack) { - action = TCP_PUSH; - current_tcp_state = TCP_CLOSED; - puts("\n"); - } else if (tcp_syn) { - action = TCP_RST; - } else if (tcp_fin) { - action = TCP_DATA; + + if (tcp_rx_user_data(tcp, tcp_seq_num, + ((char *)b) + pkt_len - payload_len, + payload_len) == TCP_PACKET_DROP) { + return; + } + + if (tcp_flags & TCP_FIN) { + tcp->fin_rx = 1; + tcp->fin_rx_seq = tcp_seq_num + payload_len + 1; + tcp_hole(tcp, tcp_seq_num + payload_len, 1); + action = tcp_stream_fin_needed(tcp, tcp->snd_una) | TCP_ACK; + tcp_send_packet(tcp, action, tcp->snd_una, tcp->rcv_nxt, 0); + } + + if (tcp->fin_rx && + tcp->fin_rx_seq == tcp->rcv_nxt) { + /* all rx data were processed */ + switch (tcp->state) { + case TCP_ESTABLISHED: + tcp_stream_set_state(tcp, TCP_LAST_ACK); + tcp_send_packet_with_retry(tcp, TCP_ACK | TCP_FIN, + tcp->snd_nxt, 0, 0); + tcp->snd_nxt++; + break; + + case TCP_FIN_WAIT_1: + if (tcp_ack_num == tcp->snd_nxt) + tcp_stream_set_state(tcp, TCP_CLOSED); + else + tcp_stream_set_state(tcp, TCP_CLOSING); + break; + + case TCP_FIN_WAIT_2: + tcp_stream_set_state(tcp, TCP_CLOSED); + break; + + default: + break; + } + } + + if (tcp->state == TCP_FIN_WAIT_1 && + tcp_stream_fin_needed(tcp, tcp->snd_una)) { + /* all tx data were acknowledged */ + tcp_send_packet_with_retry(tcp, TCP_ACK | TCP_FIN, + tcp->snd_una, 0, 0); } - break; } - return action; } /** @@ -638,22 +1160,27 @@ void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) { int tcp_len = pkt_len - IP_HDR_SIZE; u16 tcp_rx_xsum = b->ip.hdr.ip_sum; - u8 tcp_action = TCP_DATA; - u32 tcp_seq_num, tcp_ack_num; - int tcp_hdr_len, payload_len; + struct tcp_stream *tcp; + struct in_addr src; /* Verify IP header */ debug_cond(DEBUG_DEV_PKT, "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); - b->ip.hdr.ip_src = net_server_ip; + /* + * src IP address will be destroyed by TCP checksum verification + * algorithm (see tcp_set_pseudo_header()), so remember it before + * it was garbaged. + */ + src.s_addr = b->ip.hdr.ip_src.s_addr; + b->ip.hdr.ip_dst = net_ip; b->ip.hdr.ip_sum = 0; if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { debug_cond(DEBUG_DEV_PKT, "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", - &net_ip, &net_server_ip, pkt_len); + &net_ip, &src, pkt_len); return; } @@ -665,54 +1192,70 @@ void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) pkt_len)) { debug_cond(DEBUG_DEV_PKT, "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", - &net_ip, &net_server_ip, tcp_len); + &net_ip, &src, tcp_len); return; } - tcp_hdr_len = GET_TCP_HDR_LEN_IN_BYTES(b->ip.hdr.tcp_hlen); - payload_len = tcp_len - tcp_hdr_len; + tcp = tcp_stream_get(b->ip.hdr.tcp_flags & TCP_SYN, + src, + ntohs(b->ip.hdr.tcp_src), + ntohs(b->ip.hdr.tcp_dst)); + if (!tcp) + return; - if (tcp_hdr_len > TCP_HDR_SIZE) - tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, - tcp_hdr_len - TCP_HDR_SIZE); - /* - * Incoming sequence and ack numbers are server's view of the numbers. - * The app must swap the numbers when responding. - */ - tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); - tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); + tcp->rx_packets++; + tcp_rx_state_machine(tcp, b, pkt_len); + tcp_stream_put(tcp); +} - /* Packets are not ordered. Send to app as received. */ - tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, - tcp_seq_num, payload_len); +struct tcp_stream *tcp_stream_connect(struct in_addr rhost, u16 rport) +{ + struct tcp_stream *tcp; - tcp_activity_count++; - if (tcp_activity_count > TCP_ACTIVITY) { - puts("| "); - tcp_activity_count = 0; - } + tcp = tcp_stream_add(rhost, rport, random_port()); + if (!tcp) + return NULL; - if ((tcp_action & TCP_PUSH) || payload_len > 0) { - debug_cond(DEBUG_DEV_PKT, - "TCP Notify (action=%x, Seq=%u,Ack=%u,Pay%d)\n", - tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + tcp->iss = tcp_get_start_seq(); + tcp->snd_una = tcp->iss; + tcp->snd_nxt = tcp->iss + 1; - (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, b->ip.hdr.tcp_dst, - b->ip.hdr.ip_src, b->ip.hdr.tcp_src, tcp_seq_num, - tcp_ack_num, tcp_action, payload_len); + tcp_stream_set_state(tcp, TCP_SYN_SENT); + tcp_send_packet_with_retry(tcp, TCP_SYN, tcp->snd_una, 0, 0); - } else if (tcp_action != TCP_DATA) { - debug_cond(DEBUG_DEV_PKT, - "TCP Action (action=%x,Seq=%u,Ack=%u,Pay=%d)\n", - tcp_action, tcp_ack_num, tcp_ack_edge, payload_len); - - /* - * Warning: Incoming Ack & Seq sequence numbers are transposed - * here to outgoing Seq & Ack sequence numbers - */ - net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), - ntohs(b->ip.hdr.tcp_dst), - (tcp_action & (~TCP_PUSH)), - tcp_ack_num, tcp_ack_edge); + return tcp; +} + +void tcp_stream_reset(struct tcp_stream *tcp) +{ + if (tcp->state == TCP_CLOSED) + return; + + tcp_stream_set_time_handler(tcp, 0, NULL); + tcp_send_packet(tcp, TCP_RST, tcp->snd_una, 0, 0); + tcp_stream_set_status(tcp, TCP_ERR_RST); + tcp_stream_set_state(tcp, TCP_CLOSED); +} + +void tcp_stream_close(struct tcp_stream *tcp) +{ + switch (tcp->state) { + case TCP_SYN_SENT: + tcp_stream_reset(tcp); + break; + case TCP_SYN_RECEIVED: + case TCP_ESTABLISHED: + tcp->fin_tx = 1; + tcp->fin_tx_seq = tcp->snd_nxt; + if (tcp_stream_fin_needed(tcp, tcp->snd_una)) { + /* all tx data were acknowledged */ + tcp_send_packet_with_retry(tcp, TCP_ACK | TCP_FIN, + tcp->snd_una, 0, 0); + } + tcp_stream_set_state(tcp, TCP_FIN_WAIT_1); + tcp->snd_nxt++; + break; + default: + break; } } |