summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/arp.c12
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/fib_frontend.c27
-rw-r--r--net/ipv4/fib_semantics.c29
-rw-r--r--net/ipv4/fib_trie.c26
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_hashtables.c5
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_vti.c3
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp.c32
-rw-r--r--net/ipv4/tcp_bbr.c49
-rw-r--r--net/ipv4/tcp_cong.c12
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c31
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c67
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/tcp_timer.c9
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_offload.c7
39 files changed, 361 insertions, 230 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 215143246e4b..b5116ec31757 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1015,7 +1015,7 @@ static struct inet_protosw inetsw_array[] =
.type = SOCK_DGRAM,
.protocol = IPPROTO_ICMP,
.prot = &ping_prot,
- .ops = &inet_dgram_ops,
+ .ops = &inet_sockraw_ops,
.flags = INET_PROTOSW_REUSE,
},
@@ -1460,8 +1460,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
skb_set_inner_network_header(skb, nhoff);
+ }
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -1691,6 +1693,13 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ /* Some igmp sysctl, whose values are always used */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
+
return 0;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index f2a71025a770..22377c8ff14b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err)
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ if (err)
+ goto out;
+
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 89a8cac4726a..51b27ae09fbd 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1263,7 +1263,7 @@ void __init arp_init(void)
/*
* ax25 -> ASCII conversion
*/
-static char *ax2asc2(ax25_address *a, char *buf)
+static void ax2asc2(ax25_address *a, char *buf)
{
char c, *s;
int n;
@@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf)
*s++ = n + '0';
*s++ = '\0';
- if (*buf == '\0' || *buf == '-')
- return "*";
-
- return buf;
+ if (*buf == '\0' || *buf == '-') {
+ buf[0] = '*';
+ buf[1] = '\0';
+ }
}
#endif /* CONFIG_AX25 */
@@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
- seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
+ seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n",
tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
read_unlock(&n->lock);
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ae206163c273..972353cd1778 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1943,7 +1943,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1965,11 +1965,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2031,14 +2033,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2058,15 +2056,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5b03d7f3b255..968d8e165e3d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -758,7 +758,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0;
+ int dumped = 0, err;
if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -778,20 +778,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
- if (fib_table_dump(tb, skb, cb) < 0)
- goto out;
+ err = fib_table_dump(tb, skb, cb);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
dumped = 1;
next:
e++;
}
}
out:
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
- return skb->len;
+ return err;
}
/* Prepare and feed intra-kernel routing request.
@@ -1081,7 +1088,8 @@ static void nl_fib_input(struct sk_buff *skb)
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
- if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+ if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+ skb->len < nlh->nlmsg_len ||
nlmsg_len(nlh) < sizeof(*frn))
return;
@@ -1311,13 +1319,14 @@ static struct pernet_operations fib_net_ops = {
void __init ip_fib_init(void)
{
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ fib_trie_init();
register_pernet_subsys(&fib_net_ops);
+
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- fib_trie_init();
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6a4068031aaa..38c1c979ecb1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
+ struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
@@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
- if (fi->fib_metrics != (u32 *) dst_default_metrics)
- kfree(fi->fib_metrics);
+ m = fi->fib_metrics;
+ if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+ kfree(m);
kfree(fi);
}
@@ -982,11 +984,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
- fi->fib_metrics[type - 1] = val;
+ fi->fib_metrics->metrics[type - 1] = val;
}
if (ecn_ca)
- fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+ fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
return 0;
}
@@ -1042,14 +1044,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- fib_info_cnt++;
if (cfg->fc_mx) {
- fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!fi->fib_metrics)
- goto failure;
- } else
- fi->fib_metrics = (u32 *) dst_default_metrics;
-
+ fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
+ if (unlikely(!fi->fib_metrics)) {
+ kfree(fi);
+ return ERR_PTR(err);
+ }
+ atomic_set(&fi->fib_metrics->refcnt, 1);
+ } else {
+ fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+ }
+ fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1252,7 +1257,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
goto nla_put_failure;
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+ if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
goto nla_put_failure;
if (fi->fib_prefsrc &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e3665bf7a7f3..ef40bb659a7a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1932,6 +1932,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
/* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ int err;
+
if (i < s_i) {
i++;
continue;
@@ -1942,17 +1944,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
continue;
}
- if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->tb_id,
- fa->fa_type,
- xkey,
- KEYLENGTH - fa->fa_slen,
- fa->fa_tos,
- fa->fa_info, NLM_F_MULTI) < 0) {
+ err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+ tb->tb_id, fa->fa_type,
+ xkey, KEYLENGTH - fa->fa_slen,
+ fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+ if (err < 0) {
cb->args[4] = i;
- return -1;
+ return err;
}
i++;
}
@@ -1974,10 +1973,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
t_key key = cb->args[3];
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+ int err;
+
+ err = fn_trie_dump_leaf(l, tb, skb, cb);
+ if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
- return -1;
+ return err;
}
++count;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..8c72034df28e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1bc623d7f754..08575e3bd135 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
+ spin_lock_init(&pmc->lock);
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf, *nextpsf;
+ struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
- for (psf = pmc->tomb; psf; psf = nextpsf) {
+ spin_lock_bh(&pmc->lock);
+ tomb = pmc->tomb;
+ pmc->tomb = NULL;
+ sources = pmc->sources;
+ pmc->sources = NULL;
+ pmc->sfmode = MCAST_EXCLUDE;
+ pmc->sfcount[MCAST_INCLUDE] = 0;
+ pmc->sfcount[MCAST_EXCLUDE] = 1;
+ spin_unlock_bh(&pmc->lock);
+
+ for (psf = tomb; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->tomb = NULL;
- for (psf = pmc->sources; psf; psf = nextpsf) {
+ for (psf = sources; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->sources = NULL;
- pmc->sfmode = MCAST_EXCLUDE;
- pmc->sfcount[MCAST_INCLUDE] = 0;
- pmc->sfcount[MCAST_EXCLUDE] = 1;
}
/* Join a multicast group
@@ -2968,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
- /* Sysctl initialization */
- net->ipv4.sysctl_igmp_max_memberships = 20;
- net->ipv4.sysctl_igmp_max_msf = 10;
- /* IGMP reports for link-local multicast groups are enabled by default */
- net->ipv4.sysctl_igmp_llm_reports = 1;
- net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61a9deec2993..d1cab49393e2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -407,9 +407,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -443,10 +445,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -459,13 +460,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -665,6 +664,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* listeners have SOCK_RCU_FREE, not the children */
sock_reset_flag(newsk, SOCK_RCU_FREE);
+ inet_sk(newsk)->mc_list = NULL;
+
newsk->sk_mark = inet_rsk(req)->ir_mark;
atomic64_set(&newsk->sk_cookie,
atomic64_read(&inet_rsk(req)->ir_cookie));
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b5e9317eaf9e..631c0d0d7cf8 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
+ sum_frag_mem_limit(nf))
goto evict_again;
-
- percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835bfec4..b9bcf3db3af9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -455,10 +455,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return reuseport_add_sock(sk, sk2);
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
int __inet_hash(struct sock *sk, struct sock *osk,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c1..453db950dc9f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -835,8 +835,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- int res;
-
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -862,13 +860,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res)
- return res;
- res = ip4_frags_ns_ctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv4.frags);
- return res;
+ inet_frags_init_net(&net->ipv4.frags);
+
+ return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5c1dbef3626..2c3c1a223df4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -936,10 +936,12 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ ((length > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1255,6 +1257,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EINVAL;
if ((size + skb->len > mtu) &&
+ (skb_queue_len(&sk->sk_write_queue) == 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 65336f38a5d8..551dd393ceec 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -105,10 +105,10 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_COMPLETE)
return;
- if (offset != 0)
- csum = csum_sub(csum,
- csum_partial(skb_transport_header(skb) + tlen,
- offset, 0));
+ if (offset != 0) {
+ int tend_off = skb_transport_offset(skb) + tlen;
+ csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
+ }
put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
}
@@ -474,16 +474,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
return false;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate
- * timestamp with egress dev. Not possible for packets without dev
+ * timestamp with egress dev. Not possible for packets without iif
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
- if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
- (!skb->dev))
+ info = PKTINFO_SKB_CB(skb);
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ !info->ipi_ifindex)
return false;
- info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
- info->ipi_ifindex = skb->dev->ifindex;
return true;
}
@@ -820,6 +819,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct ip_mreqn mreq;
struct net_device *dev = NULL;
+ int midx;
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -864,11 +864,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
+
dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
- mreq.imr_ifindex != sk->sk_bound_dev_if)
+ mreq.imr_ifindex != sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->mc_index = mreq.imr_ifindex;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..bd7f1836bb70 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -609,8 +609,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
ip_rt_put(rt);
goto tx_dropped;
}
- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
- key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return;
tx_error:
dev->stats.tx_errors++;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5d7944f394d9..b120b9b11402 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_parm *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
+ int pkt_len = skb->len;
int err;
int mtu;
@@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
- err = skb->len;
+ err = pkt_len;
iptunnel_xmit_stats(dev, err);
return NETDEV_TX_OK;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c9392589c415..56d71a004dce 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc..c0cc6aa8cfaa 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
- __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+ const struct sock *sk = skb_to_full_sk(skb);
+ __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
if (!fl4.flowi4_oif)
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
- dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
+ dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
skb_dst_set(skb, dst);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2..5a8f7c360887 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fd8220213afc..146d86105183 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 105c0748c52f..e612991c9185 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
void ping_unhash(struct sock *sk)
{
struct inet_sock *isk = inet_sk(sk);
+
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+ write_lock_bh(&ping_table.lock);
if (sk_hashed(sk)) {
- write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
sk_nulls_node_init(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- write_unlock_bh(&ping_table.lock);
}
+ write_unlock_bh(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_unhash);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ecbe5a7c2d6d..9879b73d5565 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -356,6 +356,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct iphdr))
+ return -EINVAL;
+
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d851cae27dac..6a5b7783932e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
@@ -1364,8 +1364,12 @@ static void rt_add_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
+ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ kfree(p);
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1417,7 +1421,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ if (fi->fib_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ atomic_inc(&fi->fib_metrics->refcnt);
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1968,6 +1976,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
int res;
+ tos &= IPTOS_RT_MASK;
rcu_read_lock();
/* Multicast recognition logic is moved from route cache to here.
@@ -2568,7 +2577,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_network_header(skb);
/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_ICMP;
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e3c4043c27de..0597ad73a1fa 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->txhash = net_tx_rndhash();
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
@@ -353,7 +354,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b25de2..566cfc50f7cf 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -958,7 +958,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6a90a0e130dc..dd33c785ce16 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
- if (tsflags) {
+ if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -966,10 +966,8 @@ new_segment:
copied += copy;
offset += copy;
size -= copy;
- if (!size) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
+ if (!size)
goto out;
- }
if (skb->len < size_goal || (flags & MSG_OOB))
continue;
@@ -995,8 +993,11 @@ wait_for_memory:
}
out:
- if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
- tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ if (copied) {
+ tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ if (!(flags & MSG_SENDPAGE_NOTLAST))
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
return copied;
do_error:
@@ -1078,9 +1079,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+ if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+ uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
if (tp->fastopen_req)
return -EALREADY; /* Another Fast Open is in progress */
@@ -1093,7 +1097,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
- err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ err = __inet_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, flags);
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
@@ -1286,7 +1290,6 @@ new_segment:
copied += copy;
if (!msg_data_left(msg)) {
- tcp_tx_timestamp(sk, sockc.tsflags, skb);
if (unlikely(flags & MSG_EOR))
TCP_SKB_CB(skb)->eor = 1;
goto out;
@@ -1317,8 +1320,10 @@ wait_for_memory:
}
out:
- if (copied)
+ if (copied) {
+ tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -2294,9 +2299,16 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
+ /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+ * issue in __tcp_select_window()
+ */
+ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ dst_release(sk->sk_rx_dst);
+ sk->sk_rx_dst = NULL;
+ tcp_saved_syn_free(tp);
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 0ea66c2c9344..cb8db347c680 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -83,7 +83,8 @@ struct bbr {
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
- unused_b:6;
+ has_seen_rtt:1, /* have we seen an RTT sample yet? */
+ unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
@@ -182,6 +183,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
return rate >> BW_SCALE;
}
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -191,12 +221,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 rate = bw;
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
@@ -769,7 +800,6 @@ static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -785,11 +815,8 @@ static void bbr_init(struct sock *sk)
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
- bw = (u64)tp->snd_cwnd * BW_UNIT;
- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f9038d6b109e..0cdbea9b9288 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -167,12 +167,8 @@ void tcp_assign_congestion_control(struct sock *sk)
}
out:
rcu_read_unlock();
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
- /* Clear out private data before diag gets it and
- * the ca has not been initialized.
- */
- if (ca->get_info)
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
INET_ECN_xmit(sk);
else
@@ -183,6 +179,7 @@ void tcp_init_congestion_control(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ tcp_sk(sk)->prior_ssthresh = 0;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
@@ -199,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
icsk->icsk_ca_setsockopt = 1;
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
- if (sk->sk_state != TCP_CLOSE) {
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ if (sk->sk_state != TCP_CLOSE)
tcp_init_congestion_control(sk);
- }
}
/* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c71d49ce0c93..8fcd0c642742 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1177,13 +1177,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
*/
if (pkt_len > mss) {
unsigned int new_len = (pkt_len / mss) * mss;
- if (!in_sack && new_len < pkt_len) {
+ if (!in_sack && new_len < pkt_len)
new_len += mss;
- if (new_len >= skb->len)
- return 0;
- }
pkt_len = new_len;
}
+
+ if (pkt_len >= skb->len && !in_sack)
+ return 0;
+
err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
@@ -2559,8 +2560,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -3035,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk)
/* delta may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ rto = max(delta, 1);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
@@ -3232,7 +3232,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets)
+ if (reord < prior_fackets && reord <= tp->fackets_out)
tcp_update_reordering(sk, tp->fackets_out - reord, 0);
delta = tcp_is_fack(tp) ? pkts_acked :
@@ -5571,6 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
+ icsk->icsk_ack.lrcvtime = tcp_time_stamp;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5789,7 +5790,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* to stand against the temptation 8) --ANK
*/
inet_csk_schedule_ack(sk);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
@@ -5916,9 +5916,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (th->syn) {
if (th->fin)
goto discard;
- if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
- return 1;
+ /* It is possible that we process SYN packets from backlog,
+ * so we need to make sure to disable BH right there.
+ */
+ local_bh_disable();
+ acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ local_bh_enable();
+ if (!acceptable)
+ return 1;
consume_skb(skb);
return 0;
}
@@ -6231,7 +6237,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2259114c7242..d577ec07a0d8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -269,10 +269,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- u32 mtu = tcp_sk(sk)->mtu_info;
+ struct dst_entry *dst;
+ u32 mtu;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+ mtu = tcp_sk(sk)->mtu_info;
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -418,7 +421,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- do_redirect(icmp_skb, sk);
+ if (!sock_owned_by_user(sk))
+ do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
@@ -857,7 +861,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -869,7 +873,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1195,7 +1199,7 @@ static void tcp_v4_init_req(struct request_sock *req,
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1291,10 +1295,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1342,9 +1345,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1355,6 +1361,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c67ece1390c2..7d86fc505397 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -264,13 +264,15 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ u32 delta;
if (sample->rtt_us > 0)
tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
- if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
- lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
+ delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ if ((s32)delta > 0)
+ lp->inference = 3 * delta;
/* test if within inference */
if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6234ebaa7db1..64e1ba49c3e2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -466,6 +466,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+ newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
newtp->packets_out = 0;
newtp->retrans_out = 0;
@@ -542,6 +543,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..e45e2c41c7bd 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -263,7 +263,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
/* rate in 100's bits per second */
rate64 = ((u64)sample->in_flight) * 8000000;
- rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+ rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100);
/* Remember the maximum rate seen during this RTT
* Note: It may be more than one RTT. This function should be
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 65d6189140bc..3d7b59ecc76c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -914,6 +914,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
+ struct sk_buff *oskb = NULL;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
@@ -922,11 +923,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tp = tcp_sk(sk);
if (clone_it) {
- skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
- tcp_rate_skb_sent(sk, skb);
-
+ oskb = skb;
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
@@ -934,6 +933,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (unlikely(!skb))
return -ENOBUFS;
}
+ skb_mstamp_get(&skb->skb_mstamp);
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
@@ -1035,12 +1035,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
- if (likely(err <= 0))
- return err;
-
- tcp_enter_cwr(sk);
-
- return net_xmit_eval(err);
+ if (unlikely(err > 0)) {
+ tcp_enter_cwr(sk);
+ err = net_xmit_eval(err);
+ }
+ if (!err && oskb) {
+ skb_mstamp_get(&oskb->skb_mstamp);
+ tcp_rate_skb_sent(sk, oskb);
+ }
+ return err;
}
/* This routine just queues the buffer for sending.
@@ -1246,7 +1249,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
* eventually). The difference is that pulled data not copied, but
* immediately discarded.
*/
-static void __pskb_trim_head(struct sk_buff *skb, int len)
+static int __pskb_trim_head(struct sk_buff *skb, int len)
{
struct skb_shared_info *shinfo;
int i, k, eat;
@@ -1256,7 +1259,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
__skb_pull(skb, eat);
len -= eat;
if (!len)
- return;
+ return 0;
}
eat = len;
k = 0;
@@ -1282,23 +1285,28 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
skb_reset_tail_pointer(skb);
skb->data_len -= len;
skb->len = skb->data_len;
+ return len;
}
/* Remove acked data from a packet in the transmit queue. */
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
+ u32 delta_truesize;
+
if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
- __pskb_trim_head(skb, len);
+ delta_truesize = __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->truesize -= len;
- sk->sk_wmem_queued -= len;
- sk_mem_uncharge(sk, len);
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+ if (delta_truesize) {
+ skb->truesize -= delta_truesize;
+ sk->sk_wmem_queued -= delta_truesize;
+ sk_mem_uncharge(sk, delta_truesize);
+ sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+ }
/* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1)
@@ -1988,6 +1996,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2527,7 +2536,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- tcp_highest_sack_combine(sk, next_skb, skb);
+ tcp_highest_sack_replace(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
@@ -2704,10 +2713,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- skb_mstamp_get(&skb->skb_mstamp);
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
+ if (!err)
+ skb_mstamp_get(&skb->skb_mstamp);
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
@@ -3100,13 +3110,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
- /* Setting of flags are superfluous here for callers (and ECE is
- * not even correctly set)
- */
- tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
- TCPHDR_SYN | TCPHDR_ACK);
-
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ th->seq = htonl(tcp_rsk(req)->snt_isn);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
@@ -3320,6 +3325,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto done;
}
+ /* data was not sent, this is our new send_head */
+ sk->sk_send_head = syn_data;
+ tp->packets_out -= tcp_skb_pcount(syn_data);
+
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
@@ -3339,6 +3348,9 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
@@ -3366,6 +3378,11 @@ int tcp_connect(struct sock *sk)
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
+ buff = tcp_send_head(sk);
+ if (unlikely(buff)) {
+ tp->snd_nxt = TCP_SKB_CB(buff)->seq;
+ tp->pushed_seq = TCP_SKB_CB(buff)->seq;
+ }
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..3d063eb37848 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
- spin_lock(&tcp_probe.lock);
+ spin_lock_bh(&tcp_probe.lock);
/* If log fills, just silently drop */
if (tcp_probe_avail() > 1) {
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
@@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
}
tcp_probe.lastcwnd = tp->snd_cwnd;
- spin_unlock(&tcp_probe.lock);
+ spin_unlock_bh(&tcp_probe.lock);
wake_up(&tcp_probe.wait);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3ea1cf804748..74db43b47917 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
sk_mem_reclaim_partial(sk);
- if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
int event;
- if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
@@ -652,7 +654,8 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
- if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5bab6c3f7a2f..bef4a94ce1a0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,10 +222,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -813,7 +810,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b2be1d9757ef..6401574cd638 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
u16 mac_len = skb->mac_len;
int udp_offset, outer_hlen;
__wsum partial;
+ bool need_ipsec;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
@@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+ need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
+ !need_ipsec &&
(skb->dev->features &
(is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
(NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
@@ -119,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
@@ -232,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in