diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-06-16 11:23:36 +0200 | 
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-06-16 11:23:36 +0200 | 
| commit | 766d02786ecd22932beeb9ca8bad6d8c5a552ef9 (patch) | |
| tree | f6f2df0e35bbea914d1f4d12be6d02f128c73575 /net/ipv4 | |
| parent | 906d882cacecd37ad2fdd03ed2a9b232bcb9507e (diff) | |
| parent | 066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff) | |
Merge branch 'linus' into core/rcutip-core-rcu-2008-06-16_09.23_Mon
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/arp.c | 5 | ||||
| -rw-r--r-- | net/ipv4/devinet.c | 9 | ||||
| -rw-r--r-- | net/ipv4/fib_frontend.c | 1 | ||||
| -rw-r--r-- | net/ipv4/fib_semantics.c | 5 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 11 | ||||
| -rw-r--r-- | net/ipv4/ip_gre.c | 146 | ||||
| -rw-r--r-- | net/ipv4/ipip.c | 130 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 14 | ||||
| -rw-r--r-- | net/ipv4/raw.c | 9 | ||||
| -rw-r--r-- | net/ipv4/route.c | 4 | ||||
| -rw-r--r-- | net/ipv4/syncookies.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 27 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 80 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 10 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 32 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 5 | ||||
| -rw-r--r-- | net/ipv4/tunnel4.c | 2 | ||||
| -rw-r--r-- | net/ipv4/udp.c | 3 | 
19 files changed, 107 insertions, 401 deletions
| diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 418862f1bf22..9b539fa9fe18 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,  				   struct neighbour *n)  {  	char hbuffer[HBUFFERLEN]; -	const char hexbuf[] = "0123456789ABCDEF";  	int k, j;  	char tbuf[16];  	struct net_device *dev = n->dev; @@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,  	else {  #endif  	for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { -		hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; -		hbuffer[k++] = hexbuf[n->ha[j] & 15]; +		hbuffer[k++] = hex_asc_hi(n->ha[j]); +		hbuffer[k++] = hex_asc_lo(n->ha[j]);  		hbuffer[k++] = ':';  	}  	hbuffer[--k] = 0; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6848e4760f34..79a7ef6209ff 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {  	[IFA_LOCAL]     	= { .type = NLA_U32 },  	[IFA_ADDRESS]   	= { .type = NLA_U32 },  	[IFA_BROADCAST] 	= { .type = NLA_U32 }, -	[IFA_ANYCAST]   	= { .type = NLA_U32 },  	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },  }; @@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)  	if (tb[IFA_BROADCAST])  		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); -	if (tb[IFA_ANYCAST]) -		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); -  	if (tb[IFA_LABEL])  		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);  	else @@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)  				break;  			inet_del_ifa(in_dev, ifap, 0);  			ifa->ifa_broadcast = 0; -			ifa->ifa_anycast = 0;  			ifa->ifa_scope = 0;  		} @@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)  	       + nla_total_size(4) /* IFA_ADDRESS */  	       + nla_total_size(4) /* IFA_LOCAL */  	       + nla_total_size(4) /* IFA_BROADCAST */ -	       + nla_total_size(4) /* IFA_ANYCAST */  	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */  } @@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,  	if (ifa->ifa_broadcast)  		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); -	if (ifa->ifa_anycast) -		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); -  	if (ifa->ifa_label[0])  		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1557a4ac7a..0b2ac6a3d903 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {  	[RTA_PREFSRC]		= { .type = NLA_U32 },  	[RTA_METRICS]		= { .type = NLA_NESTED },  	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) }, -	[RTA_PROTOINFO]		= { .type = NLA_U32 },  	[RTA_FLOW]		= { .type = NLA_U32 },  }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3b83c34019fc..0d4d72827e4b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,  	rtm->rtm_dst_len = dst_len;  	rtm->rtm_src_len = 0;  	rtm->rtm_tos = tos; -	rtm->rtm_table = tb_id; +	if (tb_id < 256) +		rtm->rtm_table = tb_id; +	else +		rtm->rtm_table = RT_TABLE_COMPAT;  	NLA_PUT_U32(skb, RTA_TABLE, tb_id);  	rtm->rtm_type = type;  	rtm->rtm_flags = fi->fib_flags; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 828ea211ff21..045e799d3e1d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,  	struct inet_connection_sock *icsk = inet_csk(parent);  	struct request_sock_queue *queue = &icsk->icsk_accept_queue;  	struct listen_sock *lopt = queue->listen_opt; -	int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; +	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; +	int thresh = max_retries;  	unsigned long now = jiffies;  	struct request_sock **reqp, *req;  	int i, budget; @@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,  		}  	} +	if (queue->rskq_defer_accept) +		max_retries = queue->rskq_defer_accept; +  	budget = 2 * (lopt->nr_table_entries / (timeout / interval));  	i = lopt->clock_hand; @@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,  		reqp=&lopt->syn_table[i];  		while ((req = *reqp) != NULL) {  			if (time_after_eq(now, req->expires)) { -				if (req->retrans < thresh && -				    !req->rsk_ops->rtx_syn_ack(parent, req)) { +				if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && +				    (inet_rsk(req)->acked || +				     !req->rsk_ops->rtx_syn_ack(parent, req))) {  					unsigned long timeo;  					if (req->retrans++ == 0) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ada033406de..4342cba4ff82 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)  static void ipgre_err(struct sk_buff *skb, u32 info)  { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only     8 bytes of packet payload. It means, that precise relaying of     ICMP in the real Internet is absolutely infeasible. @@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)  out:  	read_unlock(&ipgre_lock);  	return; -#else -	struct iphdr *iph = (struct iphdr*)dp; -	struct iphdr *eiph; -	__be16	     *p = (__be16*)(dp+(iph->ihl<<2)); -	const int type = icmp_hdr(skb)->type; -	const int code = icmp_hdr(skb)->code; -	int rel_type = 0; -	int rel_code = 0; -	__be32 rel_info = 0; -	__u32 n = 0; -	__be16 flags; -	int grehlen = (iph->ihl<<2) + 4; -	struct sk_buff *skb2; -	struct flowi fl; -	struct rtable *rt; - -	if (p[1] != htons(ETH_P_IP)) -		return; - -	flags = p[0]; -	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { -		if (flags&(GRE_VERSION|GRE_ROUTING)) -			return; -		if (flags&GRE_CSUM) -			grehlen += 4; -		if (flags&GRE_KEY) -			grehlen += 4; -		if (flags&GRE_SEQ) -			grehlen += 4; -	} -	if (len < grehlen + sizeof(struct iphdr)) -		return; -	eiph = (struct iphdr*)(dp + grehlen); - -	switch (type) { -	default: -		return; -	case ICMP_PARAMETERPROB: -		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; -		if (n < (iph->ihl<<2)) -			return; - -		/* So... This guy found something strange INSIDE encapsulated -		   packet. Well, he is fool, but what can we do ? -		 */ -		rel_type = ICMP_PARAMETERPROB; -		n -= grehlen; -		rel_info = htonl(n << 24); -		break; - -	case ICMP_DEST_UNREACH: -		switch (code) { -		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH: -			/* Impossible event. */ -			return; -		case ICMP_FRAG_NEEDED: -			/* And it is the only really necessary thing :-) */ -			n = ntohs(icmp_hdr(skb)->un.frag.mtu); -			if (n < grehlen+68) -				return; -			n -= grehlen; -			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ -			if (n > ntohs(eiph->tot_len)) -				return; -			rel_info = htonl(n); -			break; -		default: -			/* All others are translated to HOST_UNREACH. -			   rfc2003 contains "deep thoughts" about NET_UNREACH, -			   I believe, it is just ether pollution. --ANK -			 */ -			rel_type = ICMP_DEST_UNREACH; -			rel_code = ICMP_HOST_UNREACH; -			break; -		} -		break; -	case ICMP_TIME_EXCEEDED: -		if (code != ICMP_EXC_TTL) -			return; -		break; -	} - -	/* Prepare fake skb to feed it to icmp_send */ -	skb2 = skb_clone(skb, GFP_ATOMIC); -	if (skb2 == NULL) -		return; -	dst_release(skb2->dst); -	skb2->dst = NULL; -	skb_pull(skb2, skb->data - (u8*)eiph); -	skb_reset_network_header(skb2); - -	/* Try to guess incoming interface */ -	memset(&fl, 0, sizeof(fl)); -	fl.fl4_dst = eiph->saddr; -	fl.fl4_tos = RT_TOS(eiph->tos); -	fl.proto = IPPROTO_GRE; -	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { -		kfree_skb(skb2); -		return; -	} -	skb2->dev = rt->u.dst.dev; - -	/* route "incoming" packet */ -	if (rt->rt_flags&RTCF_LOCAL) { -		ip_rt_put(rt); -		rt = NULL; -		fl.fl4_dst = eiph->daddr; -		fl.fl4_src = eiph->saddr; -		fl.fl4_tos = eiph->tos; -		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || -		    rt->u.dst.dev->type != ARPHRD_IPGRE) { -			ip_rt_put(rt); -			kfree_skb(skb2); -			return; -		} -	} else { -		ip_rt_put(rt); -		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || -		    skb2->dst->dev->type != ARPHRD_IPGRE) { -			kfree_skb(skb2); -			return; -		} -	} - -	/* change mtu on this route */ -	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { -		if (n > dst_mtu(skb2->dst)) { -			kfree_skb(skb2); -			return; -		} -		skb2->dst->ops->update_pmtu(skb2->dst, n); -	} else if (type == ICMP_TIME_EXCEEDED) { -		struct ip_tunnel *t = netdev_priv(skb2->dev); -		if (t->parms.iph.ttl) { -			rel_type = ICMP_DEST_UNREACH; -			rel_code = ICMP_HOST_UNREACH; -		} -	} - -	icmp_send(skb2, rel_type, rel_code, rel_info); -	kfree_skb(skb2); -#endif  }  static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 149111f08e8d..af5cb53da5cc 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)  static int ipip_err(struct sk_buff *skb, u32 info)  { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only     8 bytes of packet payload. It means, that precise relaying of     ICMP in the real Internet is absolutely infeasible.   */ @@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)  out:  	read_unlock(&ipip_lock);  	return err; -#else -	struct iphdr *iph = (struct iphdr*)dp; -	int hlen = iph->ihl<<2; -	struct iphdr *eiph; -	const int type = icmp_hdr(skb)->type; -	const int code = icmp_hdr(skb)->code; -	int rel_type = 0; -	int rel_code = 0; -	__be32 rel_info = 0; -	__u32 n = 0; -	struct sk_buff *skb2; -	struct flowi fl; -	struct rtable *rt; - -	if (len < hlen + sizeof(struct iphdr)) -		return 0; -	eiph = (struct iphdr*)(dp + hlen); - -	switch (type) { -	default: -		return 0; -	case ICMP_PARAMETERPROB: -		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; -		if (n < hlen) -			return 0; - -		/* So... This guy found something strange INSIDE encapsulated -		   packet. Well, he is fool, but what can we do ? -		 */ -		rel_type = ICMP_PARAMETERPROB; -		rel_info = htonl((n - hlen) << 24); -		break; - -	case ICMP_DEST_UNREACH: -		switch (code) { -		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH: -			/* Impossible event. */ -			return 0; -		case ICMP_FRAG_NEEDED: -			/* And it is the only really necessary thing :-) */ -			n = ntohs(icmp_hdr(skb)->un.frag.mtu); -			if (n < hlen+68) -				return 0; -			n -= hlen; -			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ -			if (n > ntohs(eiph->tot_len)) -				return 0; -			rel_info = htonl(n); -			break; -		default: -			/* All others are translated to HOST_UNREACH. -			   rfc2003 contains "deep thoughts" about NET_UNREACH, -			   I believe, it is just ether pollution. --ANK -			 */ -			rel_type = ICMP_DEST_UNREACH; -			rel_code = ICMP_HOST_UNREACH; -			break; -		} -		break; -	case ICMP_TIME_EXCEEDED: -		if (code != ICMP_EXC_TTL) -			return 0; -		break; -	} - -	/* Prepare fake skb to feed it to icmp_send */ -	skb2 = skb_clone(skb, GFP_ATOMIC); -	if (skb2 == NULL) -		return 0; -	dst_release(skb2->dst); -	skb2->dst = NULL; -	skb_pull(skb2, skb->data - (u8*)eiph); -	skb_reset_network_header(skb2); - -	/* Try to guess incoming interface */ -	memset(&fl, 0, sizeof(fl)); -	fl.fl4_daddr = eiph->saddr; -	fl.fl4_tos = RT_TOS(eiph->tos); -	fl.proto = IPPROTO_IPIP; -	if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { -		kfree_skb(skb2); -		return 0; -	} -	skb2->dev = rt->u.dst.dev; - -	/* route "incoming" packet */ -	if (rt->rt_flags&RTCF_LOCAL) { -		ip_rt_put(rt); -		rt = NULL; -		fl.fl4_daddr = eiph->daddr; -		fl.fl4_src = eiph->saddr; -		fl.fl4_tos = eiph->tos; -		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || -		    rt->u.dst.dev->type != ARPHRD_TUNNEL) { -			ip_rt_put(rt); -			kfree_skb(skb2); -			return 0; -		} -	} else { -		ip_rt_put(rt); -		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || -		    skb2->dst->dev->type != ARPHRD_TUNNEL) { -			kfree_skb(skb2); -			return 0; -		} -	} - -	/* change mtu on this route */ -	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { -		if (n > dst_mtu(skb2->dst)) { -			kfree_skb(skb2); -			return 0; -		} -		skb2->dst->ops->update_pmtu(skb2->dst, n); -	} else if (type == ICMP_TIME_EXCEEDED) { -		struct ip_tunnel *t = netdev_priv(skb2->dev); -		if (t->parms.iph.ttl) { -			rel_type = ICMP_DEST_UNREACH; -			rel_code = ICMP_HOST_UNREACH; -		} -	} - -	icmp_send(skb2, rel_type, rel_code, rel_info); -	kfree_skb(skb2); -	return 0; -#endif  }  static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5daefad3d193..7750c97fde7b 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,  			}  		}  	} + +	/* don't trust len bigger than ctx buffer */ +	if (*len > ctx->end - ctx->pointer) +		return 0; +  	return 1;  } @@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,  	if (!asn1_length_decode(ctx, &def, &len))  		return 0; +	/* primitive shall be definite, indefinite shall be constructed */ +	if (*con == ASN1_PRI && !def) +		return 0; +  	if (def)  		*eoc = ctx->pointer + len;  	else @@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,  	unsigned long *optr;  	size = eoc - ctx->pointer + 1; + +	/* first subid actually encodes first two subids */ +	if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) +		return 0; +  	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);  	if (*oid == NULL) {  		if (net_ratelimit()) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fead049daf43..e7e091d365ff 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)  	sk_common_release(sk);  } +static int raw_destroy(struct sock *sk) +{ +	lock_sock(sk); +	ip_flush_pending_frames(sk); +	release_sock(sk); +	return 0; +} +  /* This gets rid of all the nasties in af_inet. -DaveM */  static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)  { @@ -820,6 +828,7 @@ struct proto raw_prot = {  	.name		   = "RAW",  	.owner		   = THIS_MODULE,  	.close		   = raw_close, +	.destroy	   = raw_destroy,  	.connect	   = ip4_datagram_connect,  	.disconnect	   = udp_disconnect,  	.ioctl		   = raw_ioctl, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92f90ae46f4a..96be336064fb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {  	.negative_advice =	ipv4_negative_advice,  	.link_failure =		ipv4_link_failure,  	.update_pmtu =		ip_rt_update_pmtu, -	.local_out =		ip_local_out, +	.local_out =		__ip_local_out,  	.entry_size =		sizeof(struct rtable),  	.entries =		ATOMIC_INIT(0),  }; @@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,  	if (err)  		flags |= RTCF_DIRECTSRC; -	if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && +	if (out_dev == in_dev && err &&  	    (IN_DEV_SHARED_MEDIA(out_dev) ||  	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))  		flags |= RTCF_DOREDIRECT; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 73ba98921d64..d182a2a26291 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,  		cookie_check_timestamp(&tcp_opt);  	ret = NULL; -	req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ +	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */  	if (!req)  		goto out; @@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,  	ireq->rmt_port		= th->source;  	ireq->loc_addr		= ip_hdr(skb)->daddr;  	ireq->rmt_addr		= ip_hdr(skb)->saddr; -	ireq->opt		= NULL;  	ireq->snd_wscale	= tcp_opt.snd_wscale;  	ireq->rcv_wscale	= tcp_opt.rcv_wscale;  	ireq->sack_ok		= tcp_opt.sack_ok; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f88653138621..fc54a48fde1e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,  				copied += used;  				offset += used;  			} -			if (offset != skb->len) +			/* +			 * If recv_actor drops the lock (e.g. TCP splice +			 * receive) the skb pointer might be invalid when +			 * getting here: tcp_collapse might have deleted it +			 * while aggregating skbs from the socket queue. +			 */ +			skb = tcp_recv_skb(sk, seq-1, &offset); +			if (!skb || (offset+1 != skb->len))  				break;  		}  		if (tcp_hdr(skb)->fin) { @@ -2105,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,  		break;  	case TCP_DEFER_ACCEPT: -		if (val < 0) { -			err = -EINVAL; -		} else { -			if (val > MAX_TCP_ACCEPT_DEFERRED) -				val = MAX_TCP_ACCEPT_DEFERRED; -			icsk->icsk_accept_queue.rskq_defer_accept = val; +		icsk->icsk_accept_queue.rskq_defer_accept = 0; +		if (val > 0) { +			/* Translate value in seconds to number of +			 * retransmits */ +			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && +			       val > ((TCP_TIMEOUT_INIT / HZ) << +				       icsk->icsk_accept_queue.rskq_defer_accept)) +				icsk->icsk_accept_queue.rskq_defer_accept++; +			icsk->icsk_accept_queue.rskq_defer_accept++;  		}  		break; @@ -2292,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,  			val = (val ? : sysctl_tcp_fin_timeout) / HZ;  		break;  	case TCP_DEFER_ACCEPT: -		val = icsk->icsk_accept_queue.rskq_defer_accept; +		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : +			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));  		break;  	case TCP_WINDOW_CLAMP:  		val = tp->window_clamp; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b54d9d37b636..cad73b7dfef0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,  	if (before(next_dup->start_seq, skip_to_seq)) {  		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); -		tcp_sacktag_walk(skb, sk, NULL, -				 next_dup->start_seq, next_dup->end_seq, -				 1, fack_count, reord, flag); +		skb = tcp_sacktag_walk(skb, sk, NULL, +				     next_dup->start_seq, next_dup->end_seq, +				     1, fack_count, reord, flag);  	}  	return skb; @@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk)  	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);  } +static void tcp_try_keep_open(struct sock *sk) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	int state = TCP_CA_Open; + +	if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) +		state = TCP_CA_Disorder; + +	if (inet_csk(sk)->icsk_ca_state != state) { +		tcp_set_ca_state(sk, state); +		tp->high_seq = tp->snd_nxt; +	} +} +  static void tcp_try_to_open(struct sock *sk, int flag)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)  		tcp_enter_cwr(sk, 1);  	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { -		int state = TCP_CA_Open; - -		if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) -			state = TCP_CA_Disorder; - -		if (inet_csk(sk)->icsk_ca_state != state) { -			tcp_set_ca_state(sk, state); -			tp->high_seq = tp->snd_nxt; -		} +		tcp_try_keep_open(sk);  		tcp_moderate_cwnd(tp);  	} else {  		tcp_cwnd_down(sk, flag); @@ -3310,8 +3316,11 @@ no_queue:  	return 1;  old_ack: -	if (TCP_SKB_CB(skb)->sacked) +	if (TCP_SKB_CB(skb)->sacked) {  		tcp_sacktag_write_queue(sk, skb, prior_snd_una); +		if (icsk->icsk_ca_state == TCP_CA_Open) +			tcp_try_keep_open(sk); +	}  uninteresting_ack:  	SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -4532,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)  	}  } -static int tcp_defer_accept_check(struct sock *sk) -{ -	struct tcp_sock *tp = tcp_sk(sk); - -	if (tp->defer_tcp_accept.request) { -		int queued_data =  tp->rcv_nxt - tp->copied_seq; -		int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ? -			tcp_hdr((struct sk_buff *) -				sk->sk_receive_queue.prev)->fin : 0; - -		if (queued_data && hasfin) -			queued_data--; - -		if (queued_data && -		    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { -			if (sock_flag(sk, SOCK_KEEPOPEN)) { -				inet_csk_reset_keepalive_timer(sk, -							       keepalive_time_when(tp)); -			} else { -				inet_csk_delete_keepalive_timer(sk); -			} - -			inet_csk_reqsk_queue_add( -				tp->defer_tcp_accept.listen_sk, -				tp->defer_tcp_accept.request, -				sk); - -			tp->defer_tcp_accept.listen_sk->sk_data_ready( -				tp->defer_tcp_accept.listen_sk, 0); - -			sock_put(tp->defer_tcp_accept.listen_sk); -			sock_put(sk); -			tp->defer_tcp_accept.listen_sk = NULL; -			tp->defer_tcp_accept.request = NULL; -		} else if (hasfin || -			   tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { -			tcp_reset(sk); -			return -1; -		} -	} -	return 0; -} -  static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -4935,8 +4901,6 @@ step5:  	tcp_data_snd_check(sk);  	tcp_ack_snd_check(sk); - -	tcp_defer_accept_check(sk);  	return 0;  csum_error: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd601a866c2f..97a230026e13 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)  		goto drop; -	req = reqsk_alloc(&tcp_request_sock_ops); +	req = inet_reqsk_alloc(&tcp_request_sock_ops);  	if (!req)  		goto drop; @@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)  		sk->sk_sndmsg_page = NULL;  	} -	if (tp->defer_tcp_accept.request) { -		reqsk_free(tp->defer_tcp_accept.request); -		sock_put(tp->defer_tcp_accept.listen_sk); -		sock_put(sk); -		tp->defer_tcp_accept.listen_sk = NULL; -		tp->defer_tcp_accept.request = NULL; -	} -  	atomic_dec(&tcp_sockets_allocated);  	return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 019c8c16e5cc..8245247a6ceb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,  	   does sequence test, SYN is truncated, and thus we consider  	   it a bare ACK. -	   Both ends (listening sockets) accept the new incoming -	   connection and try to talk to each other. 8-) +	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this +	   bare ACK.  Otherwise, we create an established connection.  Both +	   ends (listening sockets) accept the new incoming connection and try +	   to talk to each other. 8-)  	   Note: This case is both harmless, and rare.  Possibility is about the  	   same as us discovering intelligent life on another plant tomorrow. @@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,  		if (!(flg & TCP_FLAG_ACK))  			return NULL; +		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ +		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && +		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { +			inet_rsk(req)->acked = 1; +			return NULL; +		} +  		/* OK, ACK is valid, create big socket and  		 * feed this segment to it. It will repeat all  		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO @@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,  		inet_csk_reqsk_queue_unlink(sk, req, prev);  		inet_csk_reqsk_queue_removed(sk, req); -		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && -		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - -			/* the accept queue handling is done is est recv slow -			 * path so lets make sure to start there -			 */ -			tcp_sk(child)->pred_flags = 0; -			sock_hold(sk); -			sock_hold(child); -			tcp_sk(child)->defer_tcp_accept.listen_sk = sk; -			tcp_sk(child)->defer_tcp_accept.request = req; - -			inet_csk_reset_keepalive_timer(child, -						       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); -		} else { -			inet_csk_reqsk_queue_add(sk, req, child); -		} - +		inet_csk_reqsk_queue_add(sk, req, child);  		return child;  	listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index debf23581606..ad993ecb4810 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); -	unsigned int cur_mss = tcp_current_mss(sk, 0); +	unsigned int cur_mss;  	int err;  	/* Inconslusive MTU probe */ @@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  			return -ENOMEM;  	} +	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) +		return -EHOSTUNREACH; /* Routing failure or similar. */ + +	cur_mss = tcp_current_mss(sk, 0); +  	/* If receiver has shrunk his window, and skb is out of  	 * new window, do not retransmit it. The exception is the  	 * case, when window is shrunk to zero. In this case @@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  	    (sysctl_tcp_retrans_collapse != 0))  		tcp_retrans_try_collapse(sk, skb, cur_mss); -	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) -		return -EHOSTUNREACH; /* Routing failure or similar. */ -  	/* Some Solaris stacks overoptimize and ignore the FIN on a  	 * retransmit when old data is attached.  So strip it off  	 * since it is cheap to do so and saves bytes on the network. @@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)  	TCP_SKB_CB(skb)->when = tcp_time_stamp;  	if (tcp_transmit_skb(sk, skb, 0, priority))  		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + +	TCP_INC_STATS(TCP_MIB_OUTRSTS);  }  /* WARNING: This routine must only be called when we have already sent diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4de68cf5f2aa..63ed9d6830e7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)  		goto death;  	} -	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { -		tcp_send_active_reset(sk, GFP_ATOMIC); -		goto death; -	} -  	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)  		goto out; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d3b709a6f264..cb1f0e83830b 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)  {  	struct xfrm_tunnel *handler; -	if (!pskb_may_pull(skb, sizeof(struct iphdr))) +	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))  		goto drop;  	for (handler = tunnel64_handlers; handler; handler = handler->next) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db1cb7c96d63..56fcda3694ba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)  /*   * Throw away all pending data and cancel the corking. Socket is locked.   */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk)  {  	struct udp_sock *up = udp_sk(sk); @@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)  		ip_flush_pending_frames(sk);  	}  } +EXPORT_SYMBOL(udp_flush_pending_frames);  /**   * 	udp4_hwcsum_outgoing  -  handle outgoing HW checksumming | 
