From d97106ea52aa57e63ff40d04479016836bbb5a4e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 9 Aug 2008 00:35:05 -0700
Subject: udp: Drop socket lock for encapsulated packets

The socket lock is there to protect the normal UDP receive path.
Encapsulation UDP sockets don't need that protection.  In fact
the locking is deadly for them as they may contain another UDP
packet within, possibly with the same addresses.

Also the nested bit was copied from TCP.  TCP needs it because
of accept(2) spawning sockets.  This simply doesn't apply to UDP
so I've removed it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d1477b350f76..a6aecf76a71b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -379,7 +379,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 					uh->source, saddr, dif))) {
 		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
 		if (buff) {
-			bh_lock_sock_nested(sk2);
+			bh_lock_sock(sk2);
 			if (!sock_owned_by_user(sk2))
 				udpv6_queue_rcv_skb(sk2, buff);
 			else
@@ -387,7 +387,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 			bh_unlock_sock(sk2);
 		}
 	}
-	bh_lock_sock_nested(sk);
+	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
 	else
@@ -508,7 +508,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 	/* deliver */
 
-	bh_lock_sock_nested(sk);
+	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
 	else
-- 
cgit v1.2.3


From 5e0115e500fe9dd2ca11e6f92db9123204f1327a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 13 Aug 2008 01:58:57 -0700
Subject: ipv6: Fix OOPS, ip -f inet6 route get fec0::1, linux-2.6.26,
 ip6_route_output, rt6_fill_node+0x175

Alexey Dobriyan wrote:
> On Thu, Aug 07, 2008 at 07:00:56PM +0200, John Gumb wrote:
>> Scenario: no ipv6 default route set.
>
>> # ip -f inet6 route get fec0::1
>>
>> BUG: unable to handle kernel NULL pointer dereference at 00000000
>> IP: [<c0369b85>] rt6_fill_node+0x175/0x3b0
>> EIP is at rt6_fill_node+0x175/0x3b0
>
> 0xffffffff80424dd3 is in rt6_fill_node (net/ipv6/route.c:2191).
> 2186                    } else
> 2187    #endif
> 2188                            NLA_PUT_U32(skb, RTA_IIF, iif);
> 2189            } else if (dst) {
> 2190                    struct in6_addr saddr_buf;
> 2191      ====>         if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
>					       ^^^^^^^^^^^^^^^^^^^^^^^^
>											NULL
>
> 2192                                           dst, 0, &saddr_buf) == 0)
> 2193                            NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
> 2194            }

The commit that changed this can't be reverted easily, but the patch
below works for me.

Fix NULL de-reference in rt6_fill_node() when there's no IPv6 input
device present in the dst entry.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5a3e87e4b18f..41b165ffb369 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2187,8 +2187,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
+		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
 		struct in6_addr saddr_buf;
-		if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+		if (ipv6_dev_get_saddr(idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
-- 
cgit v1.2.3


From 191cd582500f49b32a63040fedeebb0168c720af Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Thu, 14 Aug 2008 15:33:21 -0700
Subject: netns: Add network namespace argument to rt6_fill_node() and
 ipv6_dev_get_saddr()

ipv6_dev_get_saddr() blindly de-references dst_dev to get the network
namespace, but some callers might pass NULL.  Change callers to pass a
namespace pointer instead.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c     |  3 +--
 net/ipv6/fib6_rules.c   |  3 ++-
 net/ipv6/ip6_fib.c      |  1 +
 net/ipv6/ip6_output.c   |  2 +-
 net/ipv6/ndisc.c        |  2 +-
 net/ipv6/route.c        | 12 +++++++-----
 net/ipv6/xfrm6_policy.c |  4 +++-
 7 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a7842c54f58a..e2d3b7580b76 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1106,13 +1106,12 @@ out:
 	return ret;
 }
 
-int ipv6_dev_get_saddr(struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 		       const struct in6_addr *daddr, unsigned int prefs,
 		       struct in6_addr *saddr)
 {
 	struct ipv6_saddr_score scores[2],
 				*score = &scores[0], *hiscore = &scores[1];
-	struct net *net = dev_net(dst_dev);
 	struct ipv6_saddr_dst dst;
 	struct net_device *dev;
 	int dst_type;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8d05527524e3..f5de3f9dc692 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -93,7 +93,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			if (flags & RT6_LOOKUP_F_SRCPREF_COA)
 				srcprefs |= IPV6_PREFER_SRC_COA;
 
-			if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
+			if (ipv6_dev_get_saddr(net,
+					       ip6_dst_idev(&rt->u.dst)->dev,
 					       &flp->fl6_dst, srcprefs,
 					       &saddr))
 				goto again;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 52dddc25d3e6..29c7c99e69f7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -378,6 +378,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
 	arg.skb = skb;
 	arg.cb = cb;
+	arg.net = net;
 	w->args = &arg;
 
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a4402de425d9..0e844c2736a7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -934,7 +934,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 		goto out_err_release;
 
 	if (ipv6_addr_any(&fl->fl6_src)) {
-		err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
+		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
 					 &fl->fl6_dst,
 					 sk ? inet6_sk(sk)->srcprefs : 0,
 					 &fl->fl6_src);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index beb48e3f038a..f1c62ba0f56b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -549,7 +549,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 			override = 0;
 		in6_ifa_put(ifp);
 	} else {
-		if (ipv6_dev_get_saddr(dev, daddr,
+		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
 				       &tmpaddr))
 			return;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 41b165ffb369..9af6115f0f50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2106,7 +2106,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(sizeof(struct rta_cacheinfo));
 }
 
-static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_fill_node(struct net *net,
+			 struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
 			 int prefix, int nowait, unsigned int flags)
@@ -2189,7 +2190,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 	} else if (dst) {
 		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
 		struct in6_addr saddr_buf;
-		if (ipv6_dev_get_saddr(idev ? idev->dev : NULL,
+		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
@@ -2234,7 +2235,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 	} else
 		prefix = 0;
 
-	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
+	return rt6_fill_node(arg->net,
+		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
 		     prefix, 0, NLM_F_MULTI);
 }
@@ -2300,7 +2302,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
 	skb->dst = &rt->u.dst;
 
-	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
+	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 			    nlh->nlmsg_seq, 0, 0, 0);
 	if (err < 0) {
@@ -2327,7 +2329,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 	if (skb == NULL)
 		goto errout;
 
-	err = rt6_fill_node(skb, rt, NULL, NULL, 0,
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
 				event, info->pid, seq, 0, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8f1e0543b3c4..08e4cbbe3f04 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -52,12 +52,14 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
 static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
 	struct dst_entry *dst;
+	struct net_device *dev;
 
 	dst = xfrm6_dst_lookup(0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
-	ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev,
+	dev = ip6_dst_idev(dst)->dev;
+	ipv6_dev_get_saddr(dev_net(dev), dev,
 			   (struct in6_addr *)&daddr->a6, 0,
 			   (struct in6_addr *)&saddr->a6);
 	dst_release(dst);
-- 
cgit v1.2.3


From 13601cd8e44aab332cedff1d6dc10786ec890b7b Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Sun, 17 Aug 2008 23:21:52 -0700
Subject: ipv6: Fix the return interface index when get it while no message is
 received.

When get receiving interface index while no message is received,
the bounded device's index of the socket should be returned.

RFC 3542:
   Issuing getsockopt() for the above options will return the sticky
   option value i.e., the value set with setsockopt().  If no sticky
   option value has been set getsockopt() will return the following
   values:

   -  For the IPV6_PKTINFO option, it will return an in6_pktinfo
      structure with ipi6_addr being in6addr_any and ipi6_ifindex being
      zero.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 741cfcd96f88..4e5eac301f91 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -911,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		} else {
 			if (np->rxopt.bits.rxinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
 				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
@@ -921,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			}
 			if (np->rxopt.bits.rxoinfo) {
 				struct in6_pktinfo src_info;
-				src_info.ipi6_ifindex = np->mcast_oif;
+				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
 				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
-- 
cgit v1.2.3


From fdc0bde90a689b9145f2b6f271c03f4c99d09667 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Sat, 23 Aug 2008 04:43:33 -0700
Subject: icmp: icmp_sk() should not use smp_processor_id() in preemptible code

Pass namespace into icmp_xmit_lock, obtain socket inside and return
it as a result for caller.

Thanks Alexey Dobryan for this report:

Steps to reproduce:

	CONFIG_PREEMPT=y
	CONFIG_DEBUG_PREEMPT=y
	tracepath <something>

BUG: using smp_processor_id() in preemptible [00000000] code: tracepath/3205
caller is icmp_sk+0x15/0x30
Pid: 3205, comm: tracepath Not tainted 2.6.27-rc4 #1

Call Trace:
 [<ffffffff8031af14>] debug_smp_processor_id+0xe4/0xf0
 [<ffffffff80409405>] icmp_sk+0x15/0x30
 [<ffffffff8040a17b>] icmp_send+0x4b/0x3f0
 [<ffffffff8025a415>] ? trace_hardirqs_on_caller+0xd5/0x160
 [<ffffffff8025a4ad>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff8023a475>] ? local_bh_enable_ip+0x95/0x110
 [<ffffffff804285b9>] ? _spin_unlock_bh+0x39/0x40
 [<ffffffff8025a26c>] ? mark_held_locks+0x4c/0x90
 [<ffffffff8025a4ad>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff8025a415>] ? trace_hardirqs_on_caller+0xd5/0x160
 [<ffffffff803e91b4>] ip_fragment+0x8d4/0x900
 [<ffffffff803e7030>] ? ip_finish_output2+0x0/0x290
 [<ffffffff803e91e0>] ? ip_finish_output+0x0/0x60
 [<ffffffff803e6650>] ? dst_output+0x0/0x10
 [<ffffffff803e922c>] ip_finish_output+0x4c/0x60
 [<ffffffff803e92e3>] ip_output+0xa3/0xf0
 [<ffffffff803e68d0>] ip_local_out+0x20/0x30
 [<ffffffff803e753f>] ip_push_pending_frames+0x27f/0x400
 [<ffffffff80406313>] udp_push_pending_frames+0x233/0x3d0
 [<ffffffff804067d1>] udp_sendmsg+0x321/0x6f0
 [<ffffffff8040d155>] inet_sendmsg+0x45/0x80
 [<ffffffff803b967f>] sock_sendmsg+0xdf/0x110
 [<ffffffff8024a100>] ? autoremove_wake_function+0x0/0x40
 [<ffffffff80257ce5>] ? validate_chain+0x415/0x1010
 [<ffffffff8027dc10>] ? __do_fault+0x140/0x450
 [<ffffffff802597d0>] ? __lock_acquire+0x260/0x590
 [<ffffffff803b9e55>] ? sockfd_lookup_light+0x45/0x80
 [<ffffffff803ba50a>] sys_sendto+0xea/0x120
 [<ffffffff80428e42>] ? _spin_unlock_irqrestore+0x42/0x80
 [<ffffffff803134bc>] ? __up_read+0x4c/0xb0
 [<ffffffff8024e0c6>] ? up_read+0x26/0x30
 [<ffffffff8020b8bb>] system_call_fastpath+0x16/0x1b

icmp6_sk() is similar.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index abedf95fdf2d..b3157a0cc15d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -91,19 +91,22 @@ static struct inet6_protocol icmpv6_protocol = {
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static __inline__ int icmpv6_xmit_lock(struct sock *sk)
+static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 {
+	struct sock *sk;
+
 	local_bh_disable();
 
+	sk = icmpv6_sk(net);
 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 		/* This can happen if the output path (f.e. SIT or
 		 * ip6ip6 tunnel) signals dst_link_failure() for an
 		 * outgoing ICMP6 packet.
 		 */
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
-	return 0;
+	return sk;
 }
 
 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
@@ -392,11 +395,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	fl.fl_icmp_code = code;
 	security_skb_classify_flow(skb, &fl);
 
-	sk = icmpv6_sk(net);
-	np = inet6_sk(sk);
-
-	if (icmpv6_xmit_lock(sk))
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
+	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl))
 		goto out;
@@ -539,11 +541,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
 	security_skb_classify_flow(skb, &fl);
 
-	sk = icmpv6_sk(net);
-	np = inet6_sk(sk);
-
-	if (icmpv6_xmit_lock(sk))
+	sk = icmpv6_xmit_lock(net);
+	if (sk == NULL)
 		return;
+	np = inet6_sk(sk);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.3


From f410a1fba7afa79d2992620e874a343fdba28332 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 23 Aug 2008 05:16:46 -0700
Subject: ipv6: protocol for address routes

This fixes a problem spotted with zebra, but not sure if it is
necessary a kernel problem.  With IPV6 when an address is added to an
interface, Zebra creates a duplicate RIB entry, one as a connected
route, and other as a kernel route.

When an address is added to an interface the RTN_NEWADDR message
causes Zebra to create a connected route. In IPV4 when an address is
added to an interface a RTN_NEWROUTE message is set to user space with
the protocol RTPROT_KERNEL. Zebra ignores these messages, because it
already has the connected route.

The problem is that route created in IPV6 has route protocol ==
RTPROT_BOOT.  Was this a design decision or a bug? This fixes it. Same
patch applies to both net-2.6 and stable.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e2d3b7580b76..7b6a584b62dd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1688,6 +1688,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_dst_len = plen,
 		.fc_flags = RTF_UP | flags,
 		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_protocol = RTPROT_KERNEL,
 	};
 
 	ipv6_addr_copy(&cfg.fc_dst, pfx);
-- 
cgit v1.2.3


From ce3113ec57abcd41cc5a2fed02474aee3f63d12c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Aug 2008 15:18:15 -0700
Subject: ipv6: sysctl fixes

Braino: net.ipv6 in ipv6 skeleton has no business in rotable
class

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e6dfaeac6be3..587f8f60c489 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -156,7 +156,7 @@ static struct ctl_table_header *ip6_base;
 int ipv6_static_sysctl_register(void)
 {
 	static struct ctl_table empty[1];
-	ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty);
+	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
 	if (ip6_base == NULL)
 		return -ENOMEM;
 	return 0;
-- 
cgit v1.2.3


From 3cc76caa98b092a8fb3e7b4303c70f847db0651f Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Fri, 29 Aug 2008 14:06:51 -0700
Subject: ipv6: When we droped a packet, we should return NET_RX_DROP instead
 of 0

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 01d47674f7e5..e53e493606c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -377,14 +377,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	    skb_checksum_complete(skb)) {
 		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
-		return 0;
+		return NET_RX_DROP;
 	}
 
 	/* Charge it to the socket. */
 	if (sock_queue_rcv_skb(sk,skb)<0) {
 		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
-		return 0;
+		return NET_RX_DROP;
 	}
 
 	return 0;
@@ -429,7 +429,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 		if (skb_checksum_complete(skb)) {
 			atomic_inc(&sk->sk_drops);
 			kfree_skb(skb);
-			return 0;
+			return NET_RX_DROP;
 		}
 	}
 
-- 
cgit v1.2.3


From d315492b1a6ba29da0fa2860759505ae1b2db857 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Mon, 8 Sep 2008 13:17:27 -0700
Subject: netns : fix kernel panic in timewait socket destruction

How to reproduce ?
 - create a network namespace
 - use tcp protocol and get timewait socket
 - exit the network namespace
 - after a moment (when the timewait socket is destroyed), the kernel
   panics.

# BUG: unable to handle kernel NULL pointer dereference at
0000000000000007
IP: [<ffffffff821e394d>] inet_twdr_do_twkill_work+0x6e/0xb8
PGD 119985067 PUD 11c5c0067 PMD 0
Oops: 0000 [1] SMP
CPU 1
Modules linked in: ipv6 button battery ac loop dm_mod tg3 libphy ext3 jbd
edd fan thermal processor thermal_sys sg sata_svw libata dock serverworks
sd_mod scsi_mod ide_disk ide_core [last unloaded: freq_table]
Pid: 0, comm: swapper Not tainted 2.6.27-rc2 #3
RIP: 0010:[<ffffffff821e394d>] [<ffffffff821e394d>]
inet_twdr_do_twkill_work+0x6e/0xb8
RSP: 0018:ffff88011ff7fed0 EFLAGS: 00010246
RAX: ffffffffffffffff RBX: ffffffff82339420 RCX: ffff88011ff7ff30
RDX: 0000000000000001 RSI: ffff88011a4d03c0 RDI: ffff88011ac2fc00
RBP: ffffffff823392e0 R08: 0000000000000000 R09: ffff88002802a200
R10: ffff8800a5c4b000 R11: ffffffff823e4080 R12: ffff88011ac2fc00
R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000
FS: 0000000041cbd940(0000) GS:ffff8800bff839c0(0000)
knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000007 CR3: 00000000bd87c000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffff8800bff9e000, task
ffff88011ff76690)
Stack: ffffffff823392e0 0000000000000100 ffffffff821e3a3a
0000000000000008
0000000000000000 ffffffff821e3a61 ffff8800bff7c000 ffffffff8203c7e7
ffff88011ff7ff10 ffff88011ff7ff10 0000000000000021 ffffffff82351108
Call Trace:
<IRQ> [<ffffffff821e3a3a>] ? inet_twdr_hangman+0x0/0x9e
[<ffffffff821e3a61>] ? inet_twdr_hangman+0x27/0x9e
[<ffffffff8203c7e7>] ? run_timer_softirq+0x12c/0x193
[<ffffffff820390d1>] ? __do_softirq+0x5e/0xcd
[<ffffffff8200d08c>] ? call_softirq+0x1c/0x28
[<ffffffff8200e611>] ? do_softirq+0x2c/0x68
[<ffffffff8201a055>] ? smp_apic_timer_interrupt+0x8e/0xa9
[<ffffffff8200cad6>] ? apic_timer_interrupt+0x66/0x70
<EOI> [<ffffffff82011f4c>] ? default_idle+0x27/0x3b
[<ffffffff8200abbd>] ? cpu_idle+0x5f/0x7d


Code: e8 01 00 00 4c 89 e7 41 ff c5 e8 8d fd ff ff 49 8b 44 24 38 4c 89 e7
65 8b 14 25 24 00 00 00 89 d2 48 8b 80 e8 00 00 00 48 f7 d0 <48> 8b 04 d0
48 ff 40 58 e8 fc fc ff ff 48 89 df e8 c0 5f 04 00
RIP [<ffffffff821e394d>] inet_twdr_do_twkill_work+0x6e/0xb8
RSP <ffff88011ff7fed0>
CR2: 0000000000000007

This patch provides a function to purge all timewait sockets related
to a network namespace. The timewait sockets life cycle is not tied with
the network namespace, that means the timewait sockets stay alive while
the network namespace dies. The timewait sockets are for avoiding to
receive a duplicate packet from the network, if the network namespace is
freed, the network stack is removed, so no chance to receive any packets
from the outside world. Furthermore, having a pending destruction timer
on these sockets with a network namespace freed is not safe and will lead
to an oops if the timer callback which try to access data belonging to
the namespace like for example in:
	inet_twdr_do_twkill_work
		-> NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);

Purging the timewait sockets at the network namespace destruction will:
 1) speed up memory freeing for the namespace
 2) fix kernel panic on asynchronous timewait destruction

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5b90b369ccb2..b585c850a89a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2148,6 +2148,7 @@ static int tcpv6_net_init(struct net *net)
 static void tcpv6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
+	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
 }
 
 static struct pernet_operations tcpv6_net_ops = {
-- 
cgit v1.2.3


From e550dfb0c2c31b6363aa463a035fc9f8dcaa3c9b Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 9 Sep 2008 13:51:35 -0700
Subject: ipv6: Fix OOPS in ip6_dst_lookup_tail().

This fixes kernel bugzilla 11469: "TUN with 1024 neighbours:
ip6_dst_lookup_tail NULL crash"

dst->neighbour is not necessarily hooked up at this point
in the processing path, so blindly dereferencing it is
the wrong thing to do.  This NULL check exists in other
similar paths and this case was just an oversight.

Also fix the completely wrong and confusing indentation
here while we're at it.

Based upon a patch by Evgeniy Polyakov.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 64 +++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0e844c2736a7..3df2c442d90b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -943,39 +943,39 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	}
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-		/*
-		 * Here if the dst entry we've looked up
-		 * has a neighbour entry that is in the INCOMPLETE
-		 * state and the src address from the flow is
-		 * marked as OPTIMISTIC, we release the found
-		 * dst entry and replace it instead with the
-		 * dst entry of the nexthop router
-		 */
-		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
-			struct inet6_ifaddr *ifp;
-			struct flowi fl_gw;
-			int redirect;
-
-			ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
-					      (*dst)->dev, 1);
-
-			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
-			if (ifp)
-				in6_ifa_put(ifp);
-
-			if (redirect) {
-				/*
-				 * We need to get the dst entry for the
-				 * default router instead
-				 */
-				dst_release(*dst);
-				memcpy(&fl_gw, fl, sizeof(struct flowi));
-				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
-				*dst = ip6_route_output(net, sk, &fl_gw);
-				if ((err = (*dst)->error))
-					goto out_err_release;
-			}
+	/*
+	 * Here if the dst entry we've looked up
+	 * has a neighbour entry that is in the INCOMPLETE
+	 * state and the src address from the flow is
+	 * marked as OPTIMISTIC, we release the found
+	 * dst entry and replace it instead with the
+	 * dst entry of the nexthop router
+	 */
+	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
+		struct inet6_ifaddr *ifp;
+		struct flowi fl_gw;
+		int redirect;
+
+		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
+				      (*dst)->dev, 1);
+
+		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+		if (ifp)
+			in6_ifa_put(ifp);
+
+		if (redirect) {
+			/*
+			 * We need to get the dst entry for the
+			 * default router instead
+			 */
+			dst_release(*dst);
+			memcpy(&fl_gw, fl, sizeof(struct flowi));
+			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+			*dst = ip6_route_output(net, sk, &fl_gw);
+			if ((err = (*dst)->error))
+				goto out_err_release;
 		}
+	}
 #endif
 
 	return 0;
-- 
cgit v1.2.3