diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/af_inet.c | 17 | ||||
| -rw-r--r-- | net/ipv4/esp4_offload.c | 6 | ||||
| -rw-r--r-- | net/ipv4/fou_nl.c | 1 | ||||
| -rw-r--r-- | net/ipv4/fou_nl.h | 1 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 12 | ||||
| -rw-r--r-- | net/ipv4/inet_diag.c | 8 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 18 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 11 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_offload.c | 27 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 23 |
13 files changed, 58 insertions, 78 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a31b94ce8968..08d811f11896 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -756,23 +756,8 @@ EXPORT_SYMBOL(inet_stream_connect); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk) { if (mem_cgroup_sockets_enabled) { - gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL; - mem_cgroup_sk_alloc(newsk); - - if (mem_cgroup_from_sk(newsk)) { - int amt; - - /* The socket has not been accepted yet, no need - * to look at newsk->sk_wmem_queued. - */ - amt = sk_mem_pages(newsk->sk_forward_alloc + - atomic_read(&newsk->sk_rmem_alloc)); - if (amt) - mem_cgroup_sk_charge(newsk, amt, gfp); - } - - kmem_cache_charge(newsk, gfp); + __sk_charge(newsk, GFP_KERNEL); } sock_rps_record_flow(newsk); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e0d94270da28..05828d4cb6cd 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -122,8 +122,10 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { - __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6) - : htons(ETH_P_IP); + const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x, + XFRM_MODE_SKB_CB(skb)->protocol); + __be16 type = inner_mode->family == AF_INET6 ? htons(ETH_P_IPV6) + : htons(ETH_P_IP); return skb_eth_gso_segment(skb, features, type); } diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 506260b4a4dc..7a99639204b1 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #include <net/netlink.h> #include <net/genetlink.h> diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index 63a6c4ed803d..438342dc8507 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _LINUX_FOU_GEN_H #define _LINUX_FOU_GEN_H diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b4eae731c9ba..97d57c52b9ad 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -737,9 +737,9 @@ void inet_csk_init_xmit_timers(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); - timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); + timer_setup(&sk->tcp_retransmit_timer, retransmit_handler, 0); timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); - timer_setup(&sk->sk_timer, keepalive_handler, 0); + timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } @@ -750,9 +750,9 @@ void inet_csk_clear_xmit_timers(struct sock *sk) smp_store_release(&icsk->icsk_pending, 0); smp_store_release(&icsk->icsk_ack.pending, 0); - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &sk->tcp_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer); - sk_stop_timer(sk, &sk->sk_timer); + sk_stop_timer(sk, &icsk->icsk_keepalive_timer); } void inet_csk_clear_xmit_timers_sync(struct sock *sk) @@ -765,9 +765,9 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) smp_store_release(&icsk->icsk_pending, 0); smp_store_release(&icsk->icsk_ack.pending, 0); - sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &sk->tcp_retransmit_timer); sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); - sk_stop_timer_sync(sk, &sk->sk_timer); + sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer); } struct dst_entry *inet_csk_route_req(const struct sock *sk, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f0b6c5a411a2..3f5b1418a610 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -287,17 +287,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits); r->idiag_expires = - jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); + jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = - jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies); - } else if (timer_pending(&sk->sk_timer)) { + jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { r->idiag_timer = 2; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = - jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); + jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 35367f8e2da3..a1a50a5c80dc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1343,6 +1343,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, }, { + .procname = "tcp_rcvbuf_low_rtt", + .data = &init_net.ipv4.sysctl_tcp_rcvbuf_low_rtt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, + { .procname = "tcp_tso_win_divisor", .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, .maxlen = sizeof(u8), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dee578aad690..f035440c475a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2587,7 +2587,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, if (err) goto out; - atomic_long_inc(&niov->pp_ref_count); + atomic_long_inc(&niov->desc.pp_ref_count); tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag); sent += copy; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9df5d7515605..198f8a0d37be 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -896,6 +896,7 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval) const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); u32 rcvwin, rcvbuf, cap, oldval; + u32 rtt_threshold, rtt_us; u64 grow; oldval = tp->rcvq_space.space; @@ -908,10 +909,19 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval) /* DRS is always one RTT late. */ rcvwin = newval << 1; - /* slow start: allow the sender to double its rate. */ - grow = (u64)rcvwin * (newval - oldval); - do_div(grow, oldval); - rcvwin += grow << 1; + rtt_us = tp->rcv_rtt_est.rtt_us >> 3; + rtt_threshold = READ_ONCE(net->ipv4.sysctl_tcp_rcvbuf_low_rtt); + if (rtt_us < rtt_threshold) { + /* For small RTT, we set @grow to rcvwin * rtt_us/rtt_threshold. + * It might take few additional ms to reach 'line rate', + * but will avoid sk_rcvbuf inflation and poor cache use. + */ + grow = div_u64((u64)rcvwin * rtt_us, rtt_threshold); + } else { + /* slow start: allow the sender to double its rate. */ + grow = div_u64(((u64)rcvwin << 1) * (newval - oldval), oldval); + } + rcvwin += grow; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d9fec2950b..f8a9596e8f4d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2869,13 +2869,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk_timeout(icsk); + timer_expires = tcp_timeout_expires(sk); } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk_timeout(icsk); - } else if (timer_pending(&sk->sk_timer)) { + timer_expires = tcp_timeout_expires(sk); + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sk->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = jiffies; @@ -3566,6 +3566,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; + net->ipv4.sysctl_tcp_rcvbuf_low_rtt = USEC_PER_MSEC; /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. @@ -3593,7 +3594,7 @@ static int __net_init tcp_sk_init(struct net *net) sizeof(init_net.ipv4.sysctl_tcp_wmem)); } net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; - net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; + net->ipv4.sysctl_tcp_comp_sack_slack_ns = 10 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_comp_sack_rtt_percent = 33; net->ipv4.sysctl_tcp_backlog_ack_defer = 1; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d8f4d813e8dd..bd5462154f97 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -337,7 +337,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); - tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2cb93da93abc..fdda18b1abda 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -282,33 +282,6 @@ struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) return NULL; } -struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) -{ - unsigned int thlen, hlen, off; - struct tcphdr *th; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header(skb, hlen, off); - if (unlikely(!th)) - return NULL; - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - return NULL; - - hlen = off + thlen; - if (!skb_gro_may_pull(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - return NULL; - } - - skb_gro_pull(skb, thlen); - - return th; -} - struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0672c3d8f4f1..160080c9021d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -510,7 +510,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ - rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp; + rcv_delta = tcp_timeout_expires(sk) - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; @@ -697,9 +697,9 @@ void tcp_write_timer_handler(struct sock *sk) !icsk->icsk_pending) return; - if (time_after(icsk_timeout(icsk), jiffies)) { - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, - icsk_timeout(icsk)); + if (time_after(tcp_timeout_expires(sk), jiffies)) { + sk_reset_timer(sk, &sk->tcp_retransmit_timer, + tcp_timeout_expires(sk)); return; } tcp_mstamp_refresh(tcp_sk(sk)); @@ -725,12 +725,10 @@ void tcp_write_timer_handler(struct sock *sk) static void tcp_write_timer(struct timer_list *t) { - struct inet_connection_sock *icsk = - timer_container_of(icsk, t, icsk_retransmit_timer); - struct sock *sk = &icsk->icsk_inet.sk; + struct sock *sk = timer_container_of(sk, t, tcp_retransmit_timer); /* Avoid locking the socket when there is no pending event. */ - if (!smp_load_acquire(&icsk->icsk_pending)) + if (!smp_load_acquire(&inet_csk(sk)->icsk_pending)) goto out; bh_lock_sock(sk); @@ -755,12 +753,12 @@ void tcp_syn_ack_timeout(const struct request_sock *req) void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) { - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); + sk_reset_timer(sk, &inet_csk(sk)->icsk_keepalive_timer, jiffies + len); } static void tcp_delete_keepalive_timer(struct sock *sk) { - sk_stop_timer(sk, &sk->sk_timer); + sk_stop_timer(sk, &inet_csk(sk)->icsk_keepalive_timer); } void tcp_set_keepalive(struct sock *sk, int val) @@ -777,8 +775,9 @@ EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); static void tcp_keepalive_timer(struct timer_list *t) { - struct sock *sk = timer_container_of(sk, t, sk_timer); - struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = + timer_container_of(icsk, t, icsk_keepalive_timer); + struct sock *sk = &icsk->icsk_inet.sk; struct tcp_sock *tp = tcp_sk(sk); u32 elapsed; |
