summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c17
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fou_nl.c1
-rw-r--r--net/ipv4/fou_nl.h1
-rw-r--r--net/ipv4/inet_connection_sock.c12
-rw-r--r--net/ipv4/inet_diag.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_ipv4.c11
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_offload.c27
-rw-r--r--net/ipv4/tcp_timer.c23
13 files changed, 58 insertions, 78 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a31b94ce8968..08d811f11896 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -756,23 +756,8 @@ EXPORT_SYMBOL(inet_stream_connect);
void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
{
if (mem_cgroup_sockets_enabled) {
- gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
-
mem_cgroup_sk_alloc(newsk);
-
- if (mem_cgroup_from_sk(newsk)) {
- int amt;
-
- /* The socket has not been accepted yet, no need
- * to look at newsk->sk_wmem_queued.
- */
- amt = sk_mem_pages(newsk->sk_forward_alloc +
- atomic_read(&newsk->sk_rmem_alloc));
- if (amt)
- mem_cgroup_sk_charge(newsk, amt, gfp);
- }
-
- kmem_cache_charge(newsk, gfp);
+ __sk_charge(newsk, GFP_KERNEL);
}
sock_rps_record_flow(newsk);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e0d94270da28..05828d4cb6cd 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -122,8 +122,10 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6)
- : htons(ETH_P_IP);
+ const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x,
+ XFRM_MODE_SKB_CB(skb)->protocol);
+ __be16 type = inner_mode->family == AF_INET6 ? htons(ETH_P_IPV6)
+ : htons(ETH_P_IP);
return skb_eth_gso_segment(skb, features, type);
}
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 506260b4a4dc..7a99639204b1 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -2,6 +2,7 @@
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/fou.yaml */
/* YNL-GEN kernel source */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
#include <net/netlink.h>
#include <net/genetlink.h>
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
index 63a6c4ed803d..438342dc8507 100644
--- a/net/ipv4/fou_nl.h
+++ b/net/ipv4/fou_nl.h
@@ -2,6 +2,7 @@
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/fou.yaml */
/* YNL-GEN kernel header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
#ifndef _LINUX_FOU_GEN_H
#define _LINUX_FOU_GEN_H
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b4eae731c9ba..97d57c52b9ad 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -737,9 +737,9 @@ void inet_csk_init_xmit_timers(struct sock *sk,
{
struct inet_connection_sock *icsk = inet_csk(sk);
- timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+ timer_setup(&sk->tcp_retransmit_timer, retransmit_handler, 0);
timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
- timer_setup(&sk->sk_timer, keepalive_handler, 0);
+ timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0);
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}
@@ -750,9 +750,9 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
smp_store_release(&icsk->icsk_pending, 0);
smp_store_release(&icsk->icsk_ack.pending, 0);
- sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->tcp_retransmit_timer);
sk_stop_timer(sk, &icsk->icsk_delack_timer);
- sk_stop_timer(sk, &sk->sk_timer);
+ sk_stop_timer(sk, &icsk->icsk_keepalive_timer);
}
void inet_csk_clear_xmit_timers_sync(struct sock *sk)
@@ -765,9 +765,9 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
smp_store_release(&icsk->icsk_pending, 0);
smp_store_release(&icsk->icsk_ack.pending, 0);
- sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &sk->tcp_retransmit_timer);
sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
- sk_stop_timer_sync(sk, &sk->sk_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer);
}
struct dst_entry *inet_csk_route_req(const struct sock *sk,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f0b6c5a411a2..3f5b1418a610 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -287,17 +287,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 1;
r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits);
r->idiag_expires =
- jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
+ jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies);
} else if (icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
r->idiag_expires =
- jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
- } else if (timer_pending(&sk->sk_timer)) {
+ jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies);
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
r->idiag_timer = 2;
r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
r->idiag_expires =
- jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies);
}
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 35367f8e2da3..a1a50a5c80dc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1343,6 +1343,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax,
},
{
+ .procname = "tcp_rcvbuf_low_rtt",
+ .data = &init_net.ipv4.sysctl_tcp_rcvbuf_low_rtt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
+ },
+ {
.procname = "tcp_tso_win_divisor",
.data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
.maxlen = sizeof(u8),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dee578aad690..f035440c475a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2587,7 +2587,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
if (err)
goto out;
- atomic_long_inc(&niov->pp_ref_count);
+ atomic_long_inc(&niov->desc.pp_ref_count);
tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
sent += copy;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9df5d7515605..198f8a0d37be 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -896,6 +896,7 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval)
const struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 rcvwin, rcvbuf, cap, oldval;
+ u32 rtt_threshold, rtt_us;
u64 grow;
oldval = tp->rcvq_space.space;
@@ -908,10 +909,19 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval)
/* DRS is always one RTT late. */
rcvwin = newval << 1;
- /* slow start: allow the sender to double its rate. */
- grow = (u64)rcvwin * (newval - oldval);
- do_div(grow, oldval);
- rcvwin += grow << 1;
+ rtt_us = tp->rcv_rtt_est.rtt_us >> 3;
+ rtt_threshold = READ_ONCE(net->ipv4.sysctl_tcp_rcvbuf_low_rtt);
+ if (rtt_us < rtt_threshold) {
+ /* For small RTT, we set @grow to rcvwin * rtt_us/rtt_threshold.
+ * It might take few additional ms to reach 'line rate',
+ * but will avoid sk_rcvbuf inflation and poor cache use.
+ */
+ grow = div_u64((u64)rcvwin * rtt_us, rtt_threshold);
+ } else {
+ /* slow start: allow the sender to double its rate. */
+ grow = div_u64(((u64)rcvwin << 1) * (newval - oldval), oldval);
+ }
+ rcvwin += grow;
if (!RB_EMPTY_ROOT(&tp->out_of_order_queue))
rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7d9fec2950b..f8a9596e8f4d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2869,13 +2869,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk_timeout(icsk);
+ timer_expires = tcp_timeout_expires(sk);
} else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk_timeout(icsk);
- } else if (timer_pending(&sk->sk_timer)) {
+ timer_expires = tcp_timeout_expires(sk);
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sk->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = jiffies;
@@ -3566,6 +3566,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_adv_win_scale = 1;
net->ipv4.sysctl_tcp_frto = 2;
net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
+ net->ipv4.sysctl_tcp_rcvbuf_low_rtt = USEC_PER_MSEC;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
@@ -3593,7 +3594,7 @@ static int __net_init tcp_sk_init(struct net *net)
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
- net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
+ net->ipv4.sysctl_tcp_comp_sack_slack_ns = 10 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_comp_sack_rtt_percent = 33;
net->ipv4.sysctl_tcp_backlog_ack_defer = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d8f4d813e8dd..bd5462154f97 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -337,7 +337,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
- tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark;
tw->tw_priority = READ_ONCE(sk->sk_priority);
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 2cb93da93abc..fdda18b1abda 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -282,33 +282,6 @@ struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
return NULL;
}
-struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
-{
- unsigned int thlen, hlen, off;
- struct tcphdr *th;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*th);
- th = skb_gro_header(skb, hlen, off);
- if (unlikely(!th))
- return NULL;
-
- thlen = th->doff * 4;
- if (thlen < sizeof(*th))
- return NULL;
-
- hlen = off + thlen;
- if (!skb_gro_may_pull(skb, hlen)) {
- th = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!th))
- return NULL;
- }
-
- skb_gro_pull(skb, thlen);
-
- return th;
-}
-
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct tcphdr *th)
{
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0672c3d8f4f1..160080c9021d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -510,7 +510,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
* and tp->rcv_tstamp might very well have been written recently.
* rcv_delta can thus be negative.
*/
- rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp;
+ rcv_delta = tcp_timeout_expires(sk) - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -697,9 +697,9 @@ void tcp_write_timer_handler(struct sock *sk)
!icsk->icsk_pending)
return;
- if (time_after(icsk_timeout(icsk), jiffies)) {
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
- icsk_timeout(icsk));
+ if (time_after(tcp_timeout_expires(sk), jiffies)) {
+ sk_reset_timer(sk, &sk->tcp_retransmit_timer,
+ tcp_timeout_expires(sk));
return;
}
tcp_mstamp_refresh(tcp_sk(sk));
@@ -725,12 +725,10 @@ void tcp_write_timer_handler(struct sock *sk)
static void tcp_write_timer(struct timer_list *t)
{
- struct inet_connection_sock *icsk =
- timer_container_of(icsk, t, icsk_retransmit_timer);
- struct sock *sk = &icsk->icsk_inet.sk;
+ struct sock *sk = timer_container_of(sk, t, tcp_retransmit_timer);
/* Avoid locking the socket when there is no pending event. */
- if (!smp_load_acquire(&icsk->icsk_pending))
+ if (!smp_load_acquire(&inet_csk(sk)->icsk_pending))
goto out;
bh_lock_sock(sk);
@@ -755,12 +753,12 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len)
{
- sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+ sk_reset_timer(sk, &inet_csk(sk)->icsk_keepalive_timer, jiffies + len);
}
static void tcp_delete_keepalive_timer(struct sock *sk)
{
- sk_stop_timer(sk, &sk->sk_timer);
+ sk_stop_timer(sk, &inet_csk(sk)->icsk_keepalive_timer);
}
void tcp_set_keepalive(struct sock *sk, int val)
@@ -777,8 +775,9 @@ EXPORT_IPV6_MOD_GPL(tcp_set_keepalive);
static void tcp_keepalive_timer(struct timer_list *t)
{
- struct sock *sk = timer_container_of(sk, t, sk_timer);
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ timer_container_of(icsk, t, icsk_keepalive_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed;