diff options
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r-- | net/ipv4/af_inet.c | 156 |
1 files changed, 90 insertions, 66 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b7b8fd..19ab78aca547 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -126,9 +126,6 @@ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); -struct ipv4_config ipv4_config; -EXPORT_SYMBOL(ipv4_config); - /* New destruction routine */ void inet_sock_destruct(struct sock *sk) @@ -245,31 +242,6 @@ out: } EXPORT_SYMBOL(inet_listen); -u32 inet_ehash_secret __read_mostly; -EXPORT_SYMBOL(inet_ehash_secret); - -u32 ipv6_hash_secret __read_mostly; -EXPORT_SYMBOL(ipv6_hash_secret); - -/* - * inet_ehash_secret must be set exactly once, and to a non nul value - * ipv6_hash_secret must be set exactly once. - */ -void build_ehash_secret(void) -{ - u32 rnd; - - do { - get_random_bytes(&rnd, sizeof(rnd)); - } while (rnd == 0); - - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { - get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } -} -EXPORT_SYMBOL(build_ehash_secret); - /* * Create an inet socket. */ @@ -286,10 +258,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; - if (unlikely(!inet_ehash_secret)) - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) - build_ehash_secret(); - sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -371,7 +339,7 @@ lookup_protocol: inet->hdrincl = 1; } - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1162,7 +1130,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, false); + inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1256,36 +1224,36 @@ static int inet_gso_send_check(struct sk_buff *skb) if (ihl < sizeof(*iph)) goto out; + proto = iph->protocol; + + /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; - __skb_pull(skb, ihl); + skb_reset_transport_header(skb); - iph = ip_hdr(skb); - proto = iph->protocol; err = -EPROTONOSUPPORT; - rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_send_check)) err = ops->callbacks.gso_send_check(skb); - rcu_read_unlock(); out: return err; } static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; + unsigned int offset = 0; + bool udpfrag, encap; struct iphdr *iph; int proto; + int nhoff; int ihl; int id; - unsigned int offset = 0; - bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1293,12 +1261,16 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | 0))) goto out; + skb_reset_network_header(skb); + nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; @@ -1307,42 +1279,53 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (ihl < sizeof(*iph)) goto out; + id = ntohs(iph->id); + proto = iph->protocol; + + /* Warning: after this point, iph might be no longer valid */ if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + __skb_pull(skb, ihl); - tunnel = !!skb->encapsulation; + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) + features = skb->dev->hw_enc_features & netif_skb_features(skb); + SKB_GSO_CB(skb)->encap_level += ihl; - __skb_pull(skb, ihl); skb_reset_transport_header(skb); - iph = ip_hdr(skb); - id = ntohs(iph->id); - proto = iph->protocol; + segs = ERR_PTR(-EPROTONOSUPPORT); - rcu_read_lock(); + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); - rcu_read_unlock(); if (IS_ERR_OR_NULL(segs)) goto out; skb = segs; do { - iph = ip_hdr(skb); - if (!tunnel && proto == IPPROTO_UDP) { + iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); + if (udpfrag) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) iph->frag_off |= htons(IP_MF); - offset += (skb->len - skb->mac_len - iph->ihl * 4); - } else { + offset += skb->len - nhoff - ihl; + } else { iph->id = htons(id++); } - iph->tot_len = htons(skb->len - skb->mac_len); - iph->check = 0; - iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); + iph->tot_len = htons(skb->len - nhoff); + ip_send_check(iph); + if (encap) + skb_reset_inner_headers(skb); + skb->network_header = (u8 *)iph - skb->head; } while ((skb = skb->next)); out: @@ -1394,8 +1377,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ip_hdr(p); - + iph2 = (struct iphdr *)(p->data + off); + /* The above works because, with the exception of the top + * (inner most) layer, we only aggregate pkts with the same + * hdr length so all the hdrs we'll need to verify will start + * at the same offset. + */ if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { @@ -1407,13 +1394,24 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); + /* Save the IP ID check to be included later when we get to + * the transport layer so only the inner most IP ID is checked. + * This is because some GSO/TSO implementations do not + * correctly increment the IP ID for the outer hdrs. + */ + NAPI_GRO_CB(p)->flush_id = + ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; } NAPI_GRO_CB(skb)->flush |= flush; + skb_set_network_header(skb, off); + /* The above will be needed by the transport layer if there is one + * immediately following this IP hdr. + */ + skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -1428,10 +1426,10 @@ out: return pp; } -static int inet_gro_complete(struct sk_buff *skb) +static int inet_gro_complete(struct sk_buff *skb, int nhoff) { - __be16 newlen = htons(skb->len - skb_network_offset(skb)); - struct iphdr *iph = ip_hdr(skb); + __be16 newlen = htons(skb->len - nhoff); + struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1444,7 +1442,11 @@ static int inet_gro_complete(struct sk_buff *skb) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + /* Only need to add sizeof(*iph) to get to the next hdr below + * because any hdr with option will have been flushed in + * inet_gro_receive(). + */ + err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); out_unlock: rcu_read_unlock(); @@ -1520,6 +1522,7 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) return -ENOMEM; + #if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) { @@ -1545,9 +1548,11 @@ static const struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct net_protocol udp_protocol = { + .early_demux = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, @@ -1563,6 +1568,8 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib), __alignof__(struct tcp_mib)) < 0) @@ -1571,6 +1578,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet_stats; + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + u64_stats_init(&af_inet_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + u64_stats_init(&af_inet_stats->syncp); +#endif + } + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib), __alignof__(struct linux_mib)) < 0) @@ -1648,6 +1666,13 @@ static struct packet_offload ip_packet_offload __read_mostly = { }, }; +static const struct net_offload ipip_offload = { + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + }, +}; + static int __init ipv4_offload_init(void) { /* @@ -1659,6 +1684,7 @@ static int __init ipv4_offload_init(void) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); + inet_add_offload(&ipip_offload, IPPROTO_IPIP); return 0; } @@ -1707,8 +1733,6 @@ static int __init inet_init(void) ip_static_sysctl_init(); #endif - tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; - /* * Add all the base protocols. */ |