From d97106ea52aa57e63ff40d04479016836bbb5a4e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 9 Aug 2008 00:35:05 -0700 Subject: udp: Drop socket lock for encapsulated packets The socket lock is there to protect the normal UDP receive path. Encapsulation UDP sockets don't need that protection. In fact the locking is deadly for them as they may contain another UDP packet within, possibly with the same addresses. Also the nested bit was copied from TCP. TCP needs it because of accept(2) spawning sockets. This simply doesn't apply to UDP so I've removed it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d1477b350f76..a6aecf76a71b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -379,7 +379,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { - bh_lock_sock_nested(sk2); + bh_lock_sock(sk2); if (!sock_owned_by_user(sk2)) udpv6_queue_rcv_skb(sk2, buff); else @@ -387,7 +387,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, bh_unlock_sock(sk2); } } - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); else @@ -508,7 +508,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* deliver */ - bh_lock_sock_nested(sk); + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); else -- cgit v1.2.3 From 5e0115e500fe9dd2ca11e6f92db9123204f1327a Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 13 Aug 2008 01:58:57 -0700 Subject: ipv6: Fix OOPS, ip -f inet6 route get fec0::1, linux-2.6.26, ip6_route_output, rt6_fill_node+0x175 Alexey Dobriyan wrote: > On Thu, Aug 07, 2008 at 07:00:56PM +0200, John Gumb wrote: >> Scenario: no ipv6 default route set. > >> # ip -f inet6 route get fec0::1 >> >> BUG: unable to handle kernel NULL pointer dereference at 00000000 >> IP: [] rt6_fill_node+0x175/0x3b0 >> EIP is at rt6_fill_node+0x175/0x3b0 > > 0xffffffff80424dd3 is in rt6_fill_node (net/ipv6/route.c:2191). > 2186 } else > 2187 #endif > 2188 NLA_PUT_U32(skb, RTA_IIF, iif); > 2189 } else if (dst) { > 2190 struct in6_addr saddr_buf; > 2191 ====> if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, > ^^^^^^^^^^^^^^^^^^^^^^^^ > NULL > > 2192 dst, 0, &saddr_buf) == 0) > 2193 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); > 2194 } The commit that changed this can't be reverted easily, but the patch below works for me. Fix NULL de-reference in rt6_fill_node() when there's no IPv6 input device present in the dst entry. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5a3e87e4b18f..41b165ffb369 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2187,8 +2187,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, #endif NLA_PUT_U32(skb, RTA_IIF, iif); } else if (dst) { + struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); struct in6_addr saddr_buf; - if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + if (ipv6_dev_get_saddr(idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } -- cgit v1.2.3 From 191cd582500f49b32a63040fedeebb0168c720af Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Thu, 14 Aug 2008 15:33:21 -0700 Subject: netns: Add network namespace argument to rt6_fill_node() and ipv6_dev_get_saddr() ipv6_dev_get_saddr() blindly de-references dst_dev to get the network namespace, but some callers might pass NULL. Change callers to pass a namespace pointer instead. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- net/ipv6/fib6_rules.c | 3 ++- net/ipv6/ip6_fib.c | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 12 +++++++----- net/ipv6/xfrm6_policy.c | 4 +++- 7 files changed, 16 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a7842c54f58a..e2d3b7580b76 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1106,13 +1106,12 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net_device *dst_dev, +int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { struct ipv6_saddr_score scores[2], *score = &scores[0], *hiscore = &scores[1]; - struct net *net = dev_net(dst_dev); struct ipv6_saddr_dst dst; struct net_device *dev; int dst_type; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8d05527524e3..f5de3f9dc692 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -93,7 +93,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (flags & RT6_LOOKUP_F_SRCPREF_COA) srcprefs |= IPV6_PREFER_SRC_COA; - if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + if (ipv6_dev_get_saddr(net, + ip6_dst_idev(&rt->u.dst)->dev, &flp->fl6_dst, srcprefs, &saddr)) goto again; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 52dddc25d3e6..29c7c99e69f7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -378,6 +378,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.skb = skb; arg.cb = cb; + arg.net = net; w->args = &arg; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a4402de425d9..0e844c2736a7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -934,7 +934,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, goto out_err_release; if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, + err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, &fl->fl6_dst, sk ? inet6_sk(sk)->srcprefs : 0, &fl->fl6_src); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index beb48e3f038a..f1c62ba0f56b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -549,7 +549,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, override = 0; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, + if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 41b165ffb369..9af6115f0f50 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2106,7 +2106,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(sizeof(struct rta_cacheinfo)); } -static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_fill_node(struct net *net, + struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) @@ -2189,7 +2190,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } else if (dst) { struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); struct in6_addr saddr_buf; - if (ipv6_dev_get_saddr(idev ? idev->dev : NULL, + if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2234,7 +2235,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) } else prefix = 0; - return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, + return rt6_fill_node(arg->net, + arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2300,7 +2302,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); skb->dst = &rt->u.dst; - err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { @@ -2327,7 +2329,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, + err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, event, info->pid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8f1e0543b3c4..08e4cbbe3f04 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -52,12 +52,14 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; + struct net_device *dev; dst = xfrm6_dst_lookup(0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; - ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, + dev = ip6_dst_idev(dst)->dev; + ipv6_dev_get_saddr(dev_net(dev), dev, (struct in6_addr *)&daddr->a6, 0, (struct in6_addr *)&saddr->a6); dst_release(dst); -- cgit v1.2.3 From 13601cd8e44aab332cedff1d6dc10786ec890b7b Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Sun, 17 Aug 2008 23:21:52 -0700 Subject: ipv6: Fix the return interface index when get it while no message is received. When get receiving interface index while no message is received, the bounded device's index of the socket should be returned. RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: - For the IPV6_PKTINFO option, it will return an in6_pktinfo structure with ipi6_addr being in6addr_any and ipi6_ifindex being zero. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 741cfcd96f88..4e5eac301f91 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -911,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif; + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } @@ -921,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif; + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } -- cgit v1.2.3 From fdc0bde90a689b9145f2b6f271c03f4c99d09667 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 23 Aug 2008 04:43:33 -0700 Subject: icmp: icmp_sk() should not use smp_processor_id() in preemptible code Pass namespace into icmp_xmit_lock, obtain socket inside and return it as a result for caller. Thanks Alexey Dobryan for this report: Steps to reproduce: CONFIG_PREEMPT=y CONFIG_DEBUG_PREEMPT=y tracepath BUG: using smp_processor_id() in preemptible [00000000] code: tracepath/3205 caller is icmp_sk+0x15/0x30 Pid: 3205, comm: tracepath Not tainted 2.6.27-rc4 #1 Call Trace: [] debug_smp_processor_id+0xe4/0xf0 [] icmp_sk+0x15/0x30 [] icmp_send+0x4b/0x3f0 [] ? trace_hardirqs_on_caller+0xd5/0x160 [] ? trace_hardirqs_on+0xd/0x10 [] ? local_bh_enable_ip+0x95/0x110 [] ? _spin_unlock_bh+0x39/0x40 [] ? mark_held_locks+0x4c/0x90 [] ? trace_hardirqs_on+0xd/0x10 [] ? trace_hardirqs_on_caller+0xd5/0x160 [] ip_fragment+0x8d4/0x900 [] ? ip_finish_output2+0x0/0x290 [] ? ip_finish_output+0x0/0x60 [] ? dst_output+0x0/0x10 [] ip_finish_output+0x4c/0x60 [] ip_output+0xa3/0xf0 [] ip_local_out+0x20/0x30 [] ip_push_pending_frames+0x27f/0x400 [] udp_push_pending_frames+0x233/0x3d0 [] udp_sendmsg+0x321/0x6f0 [] inet_sendmsg+0x45/0x80 [] sock_sendmsg+0xdf/0x110 [] ? autoremove_wake_function+0x0/0x40 [] ? validate_chain+0x415/0x1010 [] ? __do_fault+0x140/0x450 [] ? __lock_acquire+0x260/0x590 [] ? sockfd_lookup_light+0x45/0x80 [] sys_sendto+0xea/0x120 [] ? _spin_unlock_irqrestore+0x42/0x80 [] ? __up_read+0x4c/0xb0 [] ? up_read+0x26/0x30 [] system_call_fastpath+0x16/0x1b icmp6_sk() is similar. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index abedf95fdf2d..b3157a0cc15d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -91,19 +91,22 @@ static struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static __inline__ int icmpv6_xmit_lock(struct sock *sk) +static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmpv6_sk(net); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static __inline__ void icmpv6_xmit_unlock(struct sock *sk) @@ -392,11 +395,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); - sk = icmpv6_sk(net); - np = inet6_sk(sk); - - if (icmpv6_xmit_lock(sk)) + sk = icmpv6_xmit_lock(net); + if (sk == NULL) return; + np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -539,11 +541,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); - sk = icmpv6_sk(net); - np = inet6_sk(sk); - - if (icmpv6_xmit_lock(sk)) + sk = icmpv6_xmit_lock(net); + if (sk == NULL) return; + np = inet6_sk(sk); if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; -- cgit v1.2.3 From f410a1fba7afa79d2992620e874a343fdba28332 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 23 Aug 2008 05:16:46 -0700 Subject: ipv6: protocol for address routes This fixes a problem spotted with zebra, but not sure if it is necessary a kernel problem. With IPV6 when an address is added to an interface, Zebra creates a duplicate RIB entry, one as a connected route, and other as a kernel route. When an address is added to an interface the RTN_NEWADDR message causes Zebra to create a connected route. In IPV4 when an address is added to an interface a RTN_NEWROUTE message is set to user space with the protocol RTPROT_KERNEL. Zebra ignores these messages, because it already has the connected route. The problem is that route created in IPV6 has route protocol == RTPROT_BOOT. Was this a design decision or a bug? This fixes it. Same patch applies to both net-2.6 and stable. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e2d3b7580b76..7b6a584b62dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1688,6 +1688,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_dst_len = plen, .fc_flags = RTF_UP | flags, .fc_nlinfo.nl_net = dev_net(dev), + .fc_protocol = RTPROT_KERNEL, }; ipv6_addr_copy(&cfg.fc_dst, pfx); -- cgit v1.2.3 From ce3113ec57abcd41cc5a2fed02474aee3f63d12c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Aug 2008 15:18:15 -0700 Subject: ipv6: sysctl fixes Braino: net.ipv6 in ipv6 skeleton has no business in rotable class Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e6dfaeac6be3..587f8f60c489 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -156,7 +156,7 @@ static struct ctl_table_header *ip6_base; int ipv6_static_sysctl_register(void) { static struct ctl_table empty[1]; - ip6_base = register_net_sysctl_rotable(net_ipv6_ctl_path, empty); + ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); if (ip6_base == NULL) return -ENOMEM; return 0; -- cgit v1.2.3 From 3cc76caa98b092a8fb3e7b4303c70f847db0651f Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Fri, 29 Aug 2008 14:06:51 -0700 Subject: ipv6: When we droped a packet, we should return NET_RX_DROP instead of 0 Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- net/ipv6/raw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 01d47674f7e5..e53e493606c5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -377,14 +377,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk,skb)<0) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } return 0; @@ -429,7 +429,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); - return 0; + return NET_RX_DROP; } } -- cgit v1.2.3 From d315492b1a6ba29da0fa2860759505ae1b2db857 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Sep 2008 13:17:27 -0700 Subject: netns : fix kernel panic in timewait socket destruction How to reproduce ? - create a network namespace - use tcp protocol and get timewait socket - exit the network namespace - after a moment (when the timewait socket is destroyed), the kernel panics. # BUG: unable to handle kernel NULL pointer dereference at 0000000000000007 IP: [] inet_twdr_do_twkill_work+0x6e/0xb8 PGD 119985067 PUD 11c5c0067 PMD 0 Oops: 0000 [1] SMP CPU 1 Modules linked in: ipv6 button battery ac loop dm_mod tg3 libphy ext3 jbd edd fan thermal processor thermal_sys sg sata_svw libata dock serverworks sd_mod scsi_mod ide_disk ide_core [last unloaded: freq_table] Pid: 0, comm: swapper Not tainted 2.6.27-rc2 #3 RIP: 0010:[] [] inet_twdr_do_twkill_work+0x6e/0xb8 RSP: 0018:ffff88011ff7fed0 EFLAGS: 00010246 RAX: ffffffffffffffff RBX: ffffffff82339420 RCX: ffff88011ff7ff30 RDX: 0000000000000001 RSI: ffff88011a4d03c0 RDI: ffff88011ac2fc00 RBP: ffffffff823392e0 R08: 0000000000000000 R09: ffff88002802a200 R10: ffff8800a5c4b000 R11: ffffffff823e4080 R12: ffff88011ac2fc00 R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000 FS: 0000000041cbd940(0000) GS:ffff8800bff839c0(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000007 CR3: 00000000bd87c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff8800bff9e000, task ffff88011ff76690) Stack: ffffffff823392e0 0000000000000100 ffffffff821e3a3a 0000000000000008 0000000000000000 ffffffff821e3a61 ffff8800bff7c000 ffffffff8203c7e7 ffff88011ff7ff10 ffff88011ff7ff10 0000000000000021 ffffffff82351108 Call Trace: [] ? inet_twdr_hangman+0x0/0x9e [] ? inet_twdr_hangman+0x27/0x9e [] ? run_timer_softirq+0x12c/0x193 [] ? __do_softirq+0x5e/0xcd [] ? call_softirq+0x1c/0x28 [] ? do_softirq+0x2c/0x68 [] ? smp_apic_timer_interrupt+0x8e/0xa9 [] ? apic_timer_interrupt+0x66/0x70 [] ? default_idle+0x27/0x3b [] ? cpu_idle+0x5f/0x7d Code: e8 01 00 00 4c 89 e7 41 ff c5 e8 8d fd ff ff 49 8b 44 24 38 4c 89 e7 65 8b 14 25 24 00 00 00 89 d2 48 8b 80 e8 00 00 00 48 f7 d0 <48> 8b 04 d0 48 ff 40 58 e8 fc fc ff ff 48 89 df e8 c0 5f 04 00 RIP [] inet_twdr_do_twkill_work+0x6e/0xb8 RSP CR2: 0000000000000007 This patch provides a function to purge all timewait sockets related to a network namespace. The timewait sockets life cycle is not tied with the network namespace, that means the timewait sockets stay alive while the network namespace dies. The timewait sockets are for avoiding to receive a duplicate packet from the network, if the network namespace is freed, the network stack is removed, so no chance to receive any packets from the outside world. Furthermore, having a pending destruction timer on these sockets with a network namespace freed is not safe and will lead to an oops if the timer callback which try to access data belonging to the namespace like for example in: inet_twdr_do_twkill_work -> NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); Purging the timewait sockets at the network namespace destruction will: 1) speed up memory freeing for the namespace 2) fix kernel panic on asynchronous timewait destruction Signed-off-by: Daniel Lezcano Acked-by: Denis V. Lunev Acked-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5b90b369ccb2..b585c850a89a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2148,6 +2148,7 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { -- cgit v1.2.3 From e550dfb0c2c31b6363aa463a035fc9f8dcaa3c9b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Sep 2008 13:51:35 -0700 Subject: ipv6: Fix OOPS in ip6_dst_lookup_tail(). This fixes kernel bugzilla 11469: "TUN with 1024 neighbours: ip6_dst_lookup_tail NULL crash" dst->neighbour is not necessarily hooked up at this point in the processing path, so blindly dereferencing it is the wrong thing to do. This NULL check exists in other similar paths and this case was just an oversight. Also fix the completely wrong and confusing indentation here while we're at it. Based upon a patch by Evgeniy Polyakov. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 64 +++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0e844c2736a7..3df2c442d90b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -943,39 +943,39 @@ static int ip6_dst_lookup_tail(struct sock *sk, } #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - /* - * Here if the dst entry we've looked up - * has a neighbour entry that is in the INCOMPLETE - * state and the src address from the flow is - * marked as OPTIMISTIC, we release the found - * dst entry and replace it instead with the - * dst entry of the nexthop router - */ - if (!((*dst)->neighbour->nud_state & NUD_VALID)) { - struct inet6_ifaddr *ifp; - struct flowi fl_gw; - int redirect; - - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, - (*dst)->dev, 1); - - redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); - if (ifp) - in6_ifa_put(ifp); - - if (redirect) { - /* - * We need to get the dst entry for the - * default router instead - */ - dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); - if ((err = (*dst)->error)) - goto out_err_release; - } + /* + * Here if the dst entry we've looked up + * has a neighbour entry that is in the INCOMPLETE + * state and the src address from the flow is + * marked as OPTIMISTIC, we release the found + * dst entry and replace it instead with the + * dst entry of the nexthop router + */ + if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + struct inet6_ifaddr *ifp; + struct flowi fl_gw; + int redirect; + + ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + (*dst)->dev, 1); + + redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); + if (ifp) + in6_ifa_put(ifp); + + if (redirect) { + /* + * We need to get the dst entry for the + * default router instead + */ + dst_release(*dst); + memcpy(&fl_gw, fl, sizeof(struct flowi)); + memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw); + if ((err = (*dst)->error)) + goto out_err_release; } + } #endif return 0; -- cgit v1.2.3