From e88c64f0a42575e01c7ace903d0570bc0b7fcf85 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Thu, 19 Aug 2010 06:33:05 +0000 Subject: tcp: allow effective reduction of TCP's rcv-buffer via setsockopt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Via setsockopt it is possible to reduce the socket RX buffer (SO_RCVBUF). TCP method to select the initial window and window scaling option in tcp_select_initial_window() currently misbehaves and do not consider a reduced RX socket buffer via setsockopt. Even though the server's RX buffer is reduced via setsockopt() to 256 byte (Initial Window 384 byte => 256 * 2 - (256 * 2 / 4)) the window scale option is still 7: 192.168.1.38.40676 > 78.47.222.210.5001: Flags [S], seq 2577214362, win 5840, options [mss 1460,sackOK,TS val 338417 ecr 0,nop,wscale 0], length 0 78.47.222.210.5001 > 192.168.1.38.40676: Flags [S.], seq 1570631029, ack 2577214363, win 384, options [mss 1452,sackOK,TS val 2435248895 ecr 338417,nop,wscale 7], length 0 192.168.1.38.40676 > 78.47.222.210.5001: Flags [.], ack 1, win 5840, options [nop,nop,TS val 338421 ecr 2435248895], length 0 Within tcp_select_initial_window() the original space argument - a representation of the rx buffer size - is expanded during tcp_select_initial_window(). Only sysctl_tcp_rmem[2], sysctl_rmem_max and window_clamp are considered to calculate the initial window. This patch adjust the window_clamp argument if the user explicitly reduce the receive buffer. Signed-off-by: Hagen Paul Pfeifer Cc: David S. Miller Cc: Patrick McHardy Cc: Eric Dumazet Cc: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index de3bd8458588..01b94b8d9ec9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2429,6 +2429,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) + req->window_clamp = tcp_full_space(sk); + /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(tcp_full_space(sk), mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), @@ -2555,6 +2561,11 @@ static void tcp_connect_init(struct sock *sk) tcp_initialize_rcv_mss(sk); + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) + tp->window_clamp = tcp_full_space(sk); + tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, -- cgit v1.2.3 From 0705c6f0e2d39333645bf77cf1efb94526ff1f82 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 1 Sep 2010 00:28:35 +0000 Subject: tcp: update also tcp_output with regard to RFC 5681 Thanks to Ilpo Jarvinen, this updates also the initial window setting for tcp_output with regard to RFC 5681. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 01b94b8d9ec9..ea09d2fd50c7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to value enough for senders, - * following RFC2414. Senders, not following this RFC, - * will be satisfied with 2. - */ + /* Set initial window to value enough for senders, following RFC5681. */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = 4; - if (mss > 1460 * 3) - init_cwnd = 2; - else if (mss > 1460) - init_cwnd = 3; + int init_cwnd = rfc3390_bytes_to_packets(mss); + /* when initializing use the value from init_rcv_wnd * rather than the default from above */ -- cgit v1.2.3 From a02cec2155fbea457eca8881870fd2de1a4c4c76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 20:43:57 +0000 Subject: net: return operator cleanup Change "return (EXPR);" to "return EXPR;" return is not a function, parentheses are not required. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ea09d2fd50c7..05b1ecf36763 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1370,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned mss_now, int nonagle) { - return (skb->len < mss_now && + return skb->len < mss_now && ((nonagle & TCP_NAGLE_CORK) || - (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); + (!nonagle && tp->packets_out && tcp_minshall_check(tp))); } /* Return non-zero if the Nagle test allows this packet to be @@ -1443,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); - return (skb && + return skb && tcp_snd_test(sk, skb, tcp_current_mss(sk), (tcp_skb_is_last(sk, skb) ? - tp->nonagle : TCP_NAGLE_PUSH))); + tp->nonagle : TCP_NAGLE_PUSH)); } /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet -- cgit v1.2.3