From a1889c0d2039a53ae04abb9f20c62500bd312bf3 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 28 Jul 2011 02:46:01 +0000
Subject: net: adjust array index

Convert array index from the loop bound to the loop index.

A simplified version of the semantic patch that fixes this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
expression e1,e2,ar;
@@

for(e1 = 0; e1 < e2; e1++) { <...
  ar[
- e2
+ e1
  ]
  ...> }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f1d27f6c9351..283c0a26e03f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 		pmc->sfcount[sfmode]--;
 		for (j=0; j<i; j++)
-			(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+			(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
 	} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
 #ifdef CONFIG_IP_MULTICAST
 		struct ip_sf_list *psf;
-- 
cgit v1.2.3


From f2c31e32b378a6653f8de606149d963baf11d7d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jul 2011 19:00:53 +0000
Subject: net: fix NULL dereferences in check_peer_redir()

Gergely Kalman reported crashes in check_peer_redir().

It appears commit f39925dbde778 (ipv4: Cache learned redirect
information in inetpeer.) added a race, leading to possible NULL ptr
dereference.

Since we can now change dst neighbour, we should make sure a reader can
safely use a neighbour.

Add RCU protection to dst neighbour, and make sure check_peer_redir()
can be called safely by different cpus in parallel.

As neighbours are already freed after one RCU grace period, this patch
should not add typical RCU penalty (cache cold effects)

Many thanks to Gergely for providing a pretty report pointing to the
bug.

Reported-by: Gergely Kalman <synapse@hippy.csoma.elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 10 ++++++++--
 net/ipv4/route.c     | 14 ++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ccaaa851ab42..77d3eded665a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 		skb = skb2;
 	}
 
+	rcu_read_lock();
 	neigh = dst_get_neighbour(dst);
-	if (neigh)
-		return neigh_output(neigh, skb);
+	if (neigh) {
+		int res = neigh_output(neigh, skb);
+
+		rcu_read_unlock();
+		return res;
+	}
+	rcu_read_unlock();
 
 	if (net_ratelimit())
 		printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1730689f560e..6afc4eb50591 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
 {
 	struct rtable *rt = (struct rtable *) dst;
 	__be32 orig_gw = rt->rt_gateway;
-	struct neighbour *n;
+	struct neighbour *n, *old_n;
 
 	dst_confirm(&rt->dst);
 
-	neigh_release(dst_get_neighbour(&rt->dst));
-	dst_set_neighbour(&rt->dst, NULL);
-
 	rt->rt_gateway = peer->redirect_learned.a4;
-	rt_bind_neighbour(rt);
-	n = dst_get_neighbour(&rt->dst);
+
+	n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+	if (IS_ERR(n))
+		return PTR_ERR(n);
+	old_n = xchg(&rt->dst._neighbour, n);
+	if (old_n)
+		neigh_release(old_n);
 	if (!n || !(n->nud_state & NUD_VALID)) {
 		if (n)
 			neigh_event_send(n, NULL);
-- 
cgit v1.2.3


From 6e5714eaf77d79ae1c8b47e3e040ff5411b717ec Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 3 Aug 2011 20:50:44 -0700
Subject: net: Compute protocol sequence numbers and fragment IDs using MD5.

Computers have become a lot faster since we compromised on the
partial MD4 hash which we use currently for performance reasons.

MD5 is a much safer choice, and is inline with both RFC1948 and
other ISS generators (OpenBSD, Solaris, etc.)

Furthermore, only having 24-bits of the sequence number be truly
unpredictable is a very serious limitation.  So the periodic
regeneration and 8-bit counter have been removed.  We compute and
use a full 32-bit sequence number.

For ipv6, DCCP was found to use a 32-bit truncated initial sequence
number (it needs 43-bits) and that is fixed here as well.

Reported-by: Dan Kaminsky <dan@doxpara.com>
Tested-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c               | 1 +
 net/ipv4/inetpeer.c                      | 1 +
 net/ipv4/netfilter/nf_nat_proto_common.c | 1 +
 net/ipv4/route.c                         | 1 +
 net/ipv4/tcp_ipv4.c                      | 1 +
 5 files changed, 5 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3c0369a3a663..984ec656b03b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -21,6 +21,7 @@
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#include <net/secure_seq.h>
 #include <net/ip.h>
 
 /*
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e38213817d0a..86f13c67ea85 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
 #include <linux/net.h>
 #include <net/ip.h>
 #include <net/inetpeer.h>
+#include <net/secure_seq.h>
 
 /*
  *  Theory of operations.
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 3e61faf23a9a..f52d41ea0690 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -12,6 +12,7 @@
 #include <linux/ip.h>
 
 #include <linux/netfilter.h>
+#include <net/secure_seq.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_rule.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6afc4eb50591..e3dec1c9f09d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
 #include <linux/sysctl.h>
 #endif
 #include <net/atmclip.h>
+#include <net/secure_seq.h>
 
 #define RT_FL_TOS(oldflp4) \
     ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 955b8e65b69e..1c12b8ec849d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,7 @@
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
 #include <net/netdma.h>
+#include <net/secure_seq.h>
 
 #include <linux/inet.h>
 #include <linux/ipv6.h>
-- 
cgit v1.2.3


From d547f727df86059104af2234804fdd538e112015 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 7 Aug 2011 22:20:20 -0700
Subject: ipv4: fix the reusing of routing cache entries

	compare_keys and ip_route_input_common rely on
rt_oif for distinguishing of input and output routes
with same keys values. But sometimes the input route has
also same hash chain (keyed by iif != 0) with the output
routes (keyed by orig_oif=0). Problem visible if running
with small number of rhash_entries.

	Fix them to use rt_route_iif instead. By this way
input route can not be returned to users that request
output route.

	The patch fixes the ip_rt_bug errors that were
reported in ip_local_out context, mostly for 255.255.255.255
destinations.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e3dec1c9f09d..cb7efe0567f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -731,6 +731,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
 		((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
 		(rt1->rt_mark ^ rt2->rt_mark) |
 		(rt1->rt_key_tos ^ rt2->rt_key_tos) |
+		(rt1->rt_route_iif ^ rt2->rt_route_iif) |
 		(rt1->rt_oif ^ rt2->rt_oif) |
 		(rt1->rt_iif ^ rt2->rt_iif)) == 0;
 }
@@ -2321,8 +2322,8 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
 		     ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
 		     (rth->rt_iif ^ iif) |
-		     rth->rt_oif |
 		     (rth->rt_key_tos ^ tos)) == 0 &&
+		    rt_is_input_route(rth) &&
 		    rth->rt_mark == skb->mark &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-- 
cgit v1.2.3


From dd23198e58cd35259dd09e8892bbdb90f1d57748 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <dbaluta@ixiacom.com>
Date: Sun, 7 Aug 2011 22:31:07 -0700
Subject: ipv4: Fix ip_getsockopt for IP_PKTOPTIONS

IP_PKTOPTIONS is broken for 32-bit applications running
in COMPAT mode on 64-bit kernels.

This happens because msghdr's msg_flags field is always
set to zero. When running in COMPAT mode this should be
set to MSG_CMSG_COMPAT instead.

Signed-off-by: Tiberiu Szocs-Mihai <tszocs@ixiacom.com>
Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ab0c9efd1efa..8905e92f896a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
  */
 
 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int __user *optlen)
+			    char __user *optval, int __user *optlen, unsigned flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val;
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 
 		msg.msg_control = optval;
 		msg.msg_controllen = len;
-		msg.msg_flags = 0;
+		msg.msg_flags = flags;
 
 		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
 			struct in_pktinfo info;
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level,
 {
 	int err;
 
-	err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
 		return compat_mc_getsockopt(sk, level, optname, optval, optlen,
 			ip_getsockopt);
 
-	err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+	err = do_ip_getsockopt(sk, level, optname, optval, optlen,
+		MSG_CMSG_COMPAT);
 
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
-- 
cgit v1.2.3


From 797fd3913abf2f7036003ab8d3d019cbea41affd Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 7 Aug 2011 09:11:00 +0000
Subject: netfilter: TCP and raw fix for ip_route_me_harder

TCP in some cases uses different global (raw) socket
to send RST and ACK. The transparent flag is not set there.
Currently, it is a problem for rerouting after the previous
change.

	Fix it by simplifying the checks in ip_route_me_harder
and use FLOWI_FLAG_ANYSRC even for sockets. It looks safe
because the initial routing allowed this source address to
be used and now we just have to make sure the packet is rerouted.

	As a side effect this also allows rerouting for normal
raw sockets that use spoofed source addresses which was not possible
even before we eliminated the ip_route_input call.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 2e97e3ec1eb7..929b27bdeb79 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
 	__be32 saddr = iph->saddr;
-	__u8 flags = 0;
+	__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
 	unsigned int hh_len;
 
-	if (!skb->sk && addr_type != RTN_LOCAL) {
-		if (addr_type == RTN_UNSPEC)
-			addr_type = inet_addr_type(net, saddr);
-		if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
-			flags |= FLOWI_FLAG_ANYSRC;
-		else
-			saddr = 0;
-	}
+	if (addr_type == RTN_UNSPEC)
+		addr_type = inet_addr_type(net, saddr);
+	if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
+		flags |= FLOWI_FLAG_ANYSRC;
+	else
+		saddr = 0;
 
 	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
 	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	fl4.flowi4_tos = RT_TOS(iph->tos);
 	fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 	fl4.flowi4_mark = skb->mark;
-	fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
+	fl4.flowi4_flags = flags;
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
 		return -1;
-- 
cgit v1.2.3


From 47670b767b1593433b516df7798df03f858278be Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 7 Aug 2011 09:16:09 +0000
Subject: ipv4: route non-local sources for raw socket

The raw sockets can provide source address for
routing but their privileges are not considered. We
can provide non-local source address, make sure the
FLOWI_FLAG_ANYSRC flag is set if socket has privileges
for this, i.e. based on hdrincl (IP_HDRINCL) and
transparent flags.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1457acb39cec..61714bd52925 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
-			   FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+			   daddr, saddr, 0, 0);
 
 	if (!inet->hdrincl) {
 		err = raw_probe_proto_opt(&fl4, msg);
-- 
cgit v1.2.3


From d52fbfc9e5c7bb0b0dbc256edf17dee170ce839d Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 7 Aug 2011 10:17:22 +0000
Subject: ipv4: use dst with ref during bcast/mcast loopback

Make sure skb dst has reference when moving to
another context. Currently, I don't see protocols that can
hit it when sending broadcasts/multicasts to loopback using
noref dsts, so it is just a precaution.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 77d3eded665a..8c6563361ab5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(newskb));
+	skb_dst_force(newskb);
 	netif_rx_ni(newskb);
 	return 0;
 }
-- 
cgit v1.2.3