[NETFILTER]: Add nf_conntrack subsystem.

The existing connection tracking subsystem in netfilter can only handle ipv4. There were basically two choices present to add connection tracking support for ipv6. We could either duplicate all of the ipv4 connection tracking code into an ipv6 counterpart, or (the choice taken by these patches) we could design a generic layer that could handle both ipv4 and ipv6 and thus requiring only one sub-protocol (TCP, UDP, etc.) connection tracking helper module to be written. In fact nf_conntrack is capable of working with any layer 3 protocol. The existing ipv4 specific conntrack code could also not deal with the pecularities of doing connection tracking on ipv6, which is also cured here. For example, these issues include: 1) ICMPv6 handling, which is used for neighbour discovery in ipv6 thus some messages such as these should not participate in connection tracking since effectively they are like ARP messages 2) fragmentation must be handled differently in ipv6, because the simplistic "defrag, connection track and NAT, refrag" (which the existing ipv4 connection tracking does) approach simply isn't feasible in ipv6 3) ipv6 extension header parsing must occur at the correct spots before and after connection tracking decisions, and there were no provisions for this in the existing connection tracking design 4) ipv6 has no need for stateful NAT The ipv4 specific conntrack layer is kept around, until all of the ipv4 specific conntrack helpers are ported over to nf_conntrack and it is feature complete. Once that occurs, the old conntrack stuff will get placed into the feature-removal-schedule and we will fully kill it off 6 months later. Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> Signed-off-by: Harald Welte <laforge@netfilter.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
author: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> 2005-11-09 16:38:16 -0800
committer: David S. Miller <davem@davemloft.net> 2005-11-09 16:38:16 -0800
commit: 9fb9cbb1082d6b31fb45aa1a14432449a0df6cf1 (patch)
tree: c964a62bdd766eca436c30f51a9e33e2b798b0a6 /net/ipv6
parent: 6730c3c14421b7c924d06e31bb66e0adad225547 (diff)
9 files changed, 1750 insertions, 4 deletions
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6e3480426939..a6026d2787d2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -176,6 +176,11 @@ resubmit:
 		if (ipprot->flags & INET6_PROTO_FINAL) {
 			struct ipv6hdr *hdr;	
 
+			/* Free reference early: we don't need it any more,
+			   and it may hold ip_conntrack module loaded
+			   indefinitely. */
+			nf_reset(skb);
+
 			skb_postpull_rcsum(skb, skb->nh.raw,
 					   skb->h.raw - skb->nh.raw);
 			hdr = skb->nh.ipv6h;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dbd9767b32e4..c1fa693511a1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NETFILTER
 	to->nfmark = from->nfmark;
 	/* Connection association is same as pre-frag packet */
+	nf_conntrack_put(to->nfct);
 	to->nfct = from->nfct;
 	nf_conntrack_get(to->nfct);
 	to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put_reasm(to->nfct_reasm);
+	to->nfct_reasm = from->nfct_reasm;
+	nf_conntrack_get_reasm(to->nfct_reasm);
+#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(to->nf_bridge);
 	to->nf_bridge = from->nf_bridge;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index bb7ccfe33f23..971ba60bf6e9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -278,5 +278,19 @@ config IP6_NF_RAW
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
+config NF_CONNTRACK_IPV6
+	tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && NF_CONNTRACK
+	---help---
+	  Connection tracking keeps a record of what packets have passed
+	  through your machine, in order to figure out how they are related
+	  into connections.
+
+	  This is IPv6 support on Layer 3 independent connection tracking.
+	  Layer 3 independent connection tracking is experimental scheme
+	  which generalize ip_conntrack to support other layer 3 protocols.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endmenu
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b2c370e8b1c..9ab5b2ca1f59 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index 0c7584f92172..eab8fb864ee0 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -56,9 +56,9 @@ checkentry(const char *tablename,
 	return 1;
 }
 
-static struct ip6t_target ip6t_mark_reg = {
-	.name 		= "MARK",
-	.target 	= target,
+static struct ip6t_target ip6t_mark_reg = { 
+	.name		= "MARK",
+	.target		= target,
 	.checkentry	= checkentry,
 	.me		= THIS_MODULE
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644
index 000000000000..e2c90b3a8074
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- support Layer 3 protocol independent connection tracking.
+ *	  Based on the original ip_conntrack code which	had the following
+ *	  copyright information:
+ *		(C) 1999-2001 Paul `Rusty' Russell
+ *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- add get_features() to support various size of conntrack
+ *	  structures.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
+
+static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+			     struct nf_conntrack_tuple *tuple)
+{
+	u_int32_t _addrs[8], *ap;
+
+	ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
+				sizeof(_addrs), _addrs);
+	if (ap == NULL)
+		return 0;
+
+	memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+	memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+
+	return 1;
+}
+
+static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+			     const struct nf_conntrack_tuple *orig)
+{
+	memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
+	memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
+
+	return 1;
+}
+
+static int ipv6_print_tuple(struct seq_file *s,
+			    const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
+			  NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
+			  NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
+}
+
+static int ipv6_print_conntrack(struct seq_file *s,
+				const struct nf_conn *conntrack)
+{
+	return 0;
+}
+
+/*
+ * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * if success, *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - it may return pointer pointing beyond end of packet,
+ *          if the last recognized header is truncated in the middle.
+ *        - if packet is truncated, so that all parsed headers are skipped,
+ *          it returns -1.
+ *        - if packet is fragmented, return pointer of the fragment header.
+ *        - ESP is unparsable for now and considered like
+ *          normal payload protocol.
+ *        - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ */
+
+int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp,
+			   int len)
+{
+	u8 nexthdr = *nexthdrp;
+
+	while (ipv6_ext_hdr(nexthdr)) {
+		struct ipv6_opt_hdr hdr;
+		int hdrlen;
+
+		if (len < (int)sizeof(struct ipv6_opt_hdr))
+			return -1;
+		if (nexthdr == NEXTHDR_NONE)
+			break;
+		if (nexthdr == NEXTHDR_FRAGMENT)
+			break;
+		if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+			BUG();
+		if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hdr.hdrlen+2)<<2;
+		else
+			hdrlen = ipv6_optlen(&hdr);
+
+		nexthdr = hdr.nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*nexthdrp = nexthdr;
+	return start;
+}
+
+static int
+ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
+	     u_int8_t *protonum)
+{
+	unsigned int extoff;
+	unsigned char pnum;
+	int protoff;
+
+	extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
+	pnum = (*pskb)->nh.ipv6h->nexthdr;
+
+	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+					 (*pskb)->len - extoff);
+
+	/*
+	 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
+	 * except of IPv6 & ext headers. but it's tracked anyway. - YK
+	 */
+	if ((protoff < 0) || (protoff > (*pskb)->len)) {
+		DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
+		NF_CT_STAT_INC(error);
+		NF_CT_STAT_INC(invalid);
+		return -NF_ACCEPT;
+	}
+
+	*dataoff = protoff;
+	*protonum = pnum;
+	return NF_ACCEPT;
+}
+
+static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
+{
+	return NF_CT_F_BASIC;
+}
+
+static unsigned int ipv6_confirm(unsigned int hooknum,
+				 struct sk_buff **pskb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct && ct->helper) {
+		unsigned int ret, protoff;
+		unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
+				      - (*pskb)->data;
+		unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+
+		protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+						 (*pskb)->len - extoff);
+		if (protoff < 0 || protoff > (*pskb)->len ||
+		    pnum == NEXTHDR_FRAGMENT) {
+			DEBUGP("proto header not found\n");
+			return NF_ACCEPT;
+		}
+
+		ret = ct->helper->help(pskb, protoff, ct, ctinfo);
+		if (ret != NF_ACCEPT)
+			return ret;
+	}
+
+	/* We've seen it coming out the other side: confirm it */
+
+	return nf_conntrack_confirm(pskb);
+}
+
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+			       struct net_device *in,
+			       struct net_device *out,
+			       int (*okfn)(struct sk_buff *));
+static unsigned int ipv6_defrag(unsigned int hooknum,
+				struct sk_buff **pskb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *reasm;
+
+	/* Previously seen (loopback)?  */
+	if ((*pskb)->nfct)
+		return NF_ACCEPT;
+
+	reasm = nf_ct_frag6_gather(*pskb);
+
+	/* queued */
+	if (reasm == NULL)
+		return NF_STOLEN;
+
+	/* error occured or not fragmented */
+	if (reasm == *pskb)
+		return NF_ACCEPT;
+
+	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+			   (struct net_device *)out, okfn);
+
+	return NF_STOLEN;
+}
+
+static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+				      struct sk_buff **pskb,
+				      const struct net_device *in,
+				      const struct net_device *out,
+				      int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *reasm = (*pskb)->nfct_reasm;
+
+	/* This packet is fragmented and has reassembled packet. */
+	if (reasm) {
+		/* Reassembled packet isn't parsed yet ? */
+		if (!reasm->nfct) {
+			unsigned int ret;
+
+			ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+			if (ret != NF_ACCEPT)
+				return ret;
+		}
+		nf_conntrack_get(reasm->nfct);
+		(*pskb)->nfct = reasm->nfct;
+		return NF_ACCEPT;
+	}
+
+	return nf_conntrack_in(PF_INET6, hooknum, pskb);
+}
+
+static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+					 struct sk_buff **pskb,
+					 const struct net_device *in,
+					 const struct net_device *out,
+					 int (*okfn)(struct sk_buff *))
+{
+	/* root is playing with raw sockets. */
+	if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+		if (net_ratelimit())
+			printk("ipv6_conntrack_local: packet too short\n");
+		return NF_ACCEPT;
+	}
+	return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+   make it the first hook. */
+static struct nf_hook_ops ipv6_conntrack_defrag_ops = {
+	.hook		= ipv6_defrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_PRE_ROUTING,
+	.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv6_conntrack_in_ops = {
+	.hook		= ipv6_conntrack_in,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_PRE_ROUTING,
+	.priority	= NF_IP6_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv6_conntrack_local_out_ops = {
+	.hook		= ipv6_conntrack_local,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_LOCAL_OUT,
+	.priority	= NF_IP6_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = {
+	.hook		= ipv6_defrag,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_LOCAL_OUT,
+	.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+};
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ipv6_conntrack_out_ops = {
+	.hook		= ipv6_confirm,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_POST_ROUTING,
+	.priority	= NF_IP6_PRI_LAST,
+};
+
+static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
+	.hook		= ipv6_confirm,
+	.owner		= THIS_MODULE,
+	.pf		= PF_INET6,
+	.hooknum	= NF_IP6_LOCAL_IN,
+	.priority	= NF_IP6_PRI_LAST-1,
+};
+
+#ifdef CONFIG_SYSCTL
+
+/* From nf_conntrack_proto_icmpv6.c */
+extern unsigned long nf_ct_icmpv6_timeout;
+
+/* From nf_conntrack_frag6.c */
+extern unsigned long nf_ct_frag6_timeout;
+extern unsigned long nf_ct_frag6_low_thresh;
+extern unsigned long nf_ct_frag6_high_thresh;
+
+static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+	{
+		.ctl_name	= NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.data		= &nf_ct_icmpv6_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_NF_CONNTRACK_FRAG6_TIMEOUT,
+		.procname	= "nf_conntrack_frag6_timeout",
+		.data		= &nf_ct_frag6_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
+		.procname	= "nf_conntrack_frag6_low_thresh",
+		.data		= &nf_ct_frag6_low_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+	{
+		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
+		.procname	= "nf_conntrack_frag6_high_thresh",
+		.data		= &nf_ct_frag6_high_thresh,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
+        { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+	{
+		.ctl_name	= NET_NETFILTER,
+		.procname	= "netfilter",
+		.mode		= 0555,
+		.child		= nf_ct_sysctl_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+	{
+		.ctl_name	= CTL_NET,
+		.procname	= "net",
+		.mode		= 0555,
+		.child		= nf_ct_netfilter_table,
+	},
+	{ .ctl_name = 0 }
+};
+#endif
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
+	.l3proto		= PF_INET6,
+	.name			= "ipv6",
+	.pkt_to_tuple		= ipv6_pkt_to_tuple,
+	.invert_tuple		= ipv6_invert_tuple,
+	.print_tuple		= ipv6_print_tuple,
+	.print_conntrack	= ipv6_print_conntrack,
+	.prepare		= ipv6_prepare,
+	.get_features		= ipv6_get_features,
+	.me			= THIS_MODULE,
+};
+
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+static int init_or_cleanup(int init)
+{
+	int ret = 0;
+
+	if (!init) goto cleanup;
+
+	ret = nf_ct_frag6_init();
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't initialize frag6.\n");
+		goto cleanup_nothing;
+	}
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register tcp.\n");
+		goto cleanup_frag6;
+	}
+
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register udp.\n");
+		goto cleanup_tcp;
+	}
+
+	ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register icmpv6.\n");
+		goto cleanup_udp;
+	}
+
+	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register ipv6\n");
+		goto cleanup_icmpv6;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_defrag_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register pre-routing defrag "
+		       "hook.\n");
+		goto cleanup_ipv6;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register local_out defrag "
+		       "hook.\n");
+		goto cleanup_defragops;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_in_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register pre-routing hook.\n");
+		goto cleanup_defraglocalops;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_local_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register local out hook.\n");
+		goto cleanup_inops;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_out_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register post-routing hook.\n");
+		goto cleanup_inandlocalops;
+	}
+
+	ret = nf_register_hook(&ipv6_conntrack_local_in_ops);
+	if (ret < 0) {
+		printk("nf_conntrack_ipv6: can't register local in hook.\n");
+		goto cleanup_inoutandlocalops;
+	}
+
+#ifdef CONFIG_SYSCTL
+	nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+	if (nf_ct_ipv6_sysctl_header == NULL) {
+		printk("nf_conntrack: can't register to sysctl.\n");
+		ret = -ENOMEM;
+		goto cleanup_localinops;
+	}
+#endif
+	return ret;
+
+ cleanup:
+	synchronize_net();
+#ifdef CONFIG_SYSCTL
+ 	unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
+ cleanup_localinops:
+#endif
+	nf_unregister_hook(&ipv6_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+	nf_unregister_hook(&ipv6_conntrack_out_ops);
+ cleanup_inandlocalops:
+	nf_unregister_hook(&ipv6_conntrack_local_out_ops);
+ cleanup_inops:
+	nf_unregister_hook(&ipv6_conntrack_in_ops);
+ cleanup_defraglocalops:
+	nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+	nf_unregister_hook(&ipv6_conntrack_defrag_ops);
+ cleanup_ipv6:
+	nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
+ cleanup_icmpv6:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
+ cleanup_udp:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
+ cleanup_tcp:
+	nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
+ cleanup_frag6:
+	nf_ct_frag6_cleanup();
+ cleanup_nothing:
+	return ret;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
+
+static int __init init(void)
+{
+	need_nf_conntrack();
+	return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+	init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+void need_ip6_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(need_ip6_conntrack);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..c0f1da5497a9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *	- ICMPv6 tracking support. Derived from the original ip_conntrack code
+ *	  net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
+ *	  copyright information:
+ *		(C) 1999-2001 Paul `Rusty' Russell
+ *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+
+unsigned long nf_ct_icmpv6_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+			       unsigned int dataoff,
+			       struct nf_conntrack_tuple *tuple)
+{
+	struct icmp6hdr _hdr, *hp;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return 0;
+	tuple->dst.u.icmp.type = hp->icmp6_type;
+	tuple->src.u.icmp.id = hp->icmp6_identifier;
+	tuple->dst.u.icmp.code = hp->icmp6_code;
+
+	return 1;
+}
+
+static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+			       const struct nf_conntrack_tuple *orig)
+{
+	/* Add 1; spaces filled with 0. */
+	static u_int8_t invmap[] = {
+		[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+		[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+		[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
+		[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
+	};
+
+	__u8 type = orig->dst.u.icmp.type - 128;
+	if (type >= sizeof(invmap) || !invmap[type])
+		return 0;
+
+	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmpv6_print_tuple(struct seq_file *s,
+			      const struct nf_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "type=%u code=%u id=%u ",
+			  tuple->dst.u.icmp.type,
+			  tuple->dst.u.icmp.code,
+			  ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmpv6_print_conntrack(struct seq_file *s,
+				  const struct nf_conn *conntrack)
+{
+	return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo,
+		       int pf,
+		       unsigned int hooknum)
+{
+	/* Try to delete connection immediately after all replies:
+           won't actually vanish as we still have skb, and del_timer
+           means this will only run once even if count hits zero twice
+           (theoretically possible with SMP) */
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+		if (atomic_dec_and_test(&ct->proto.icmp.count)
+		    && del_timer(&ct->timeout))
+			ct->timeout.function((unsigned long)ct);
+	} else {
+		atomic_inc(&ct->proto.icmp.count);
+		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
+	}
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmpv6_new(struct nf_conn *conntrack,
+		      const struct sk_buff *skb,
+		      unsigned int dataoff)
+{
+	static u_int8_t valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+
+	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
+	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
+		/* Can't create a new ICMPv6 `conn' with this. */
+		DEBUGP("icmp: can't create new conn with type %u\n",
+		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+		return 0;
+	}
+	atomic_set(&conntrack->proto.icmp.count, 0);
+	return 1;
+}
+
+extern int
+nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
+static int
+icmpv6_error_message(struct sk_buff *skb,
+		     unsigned int icmp6off,
+		     enum ip_conntrack_info *ctinfo,
+		     unsigned int hooknum)
+{
+	struct nf_conntrack_tuple intuple, origtuple;
+	struct nf_conntrack_tuple_hash *h;
+	struct icmp6hdr _hdr, *hp;
+	unsigned int inip6off;
+	struct nf_conntrack_protocol *inproto;
+	u_int8_t inprotonum;
+	unsigned int inprotoff;
+
+	NF_CT_ASSERT(skb->nfct == NULL);
+
+	hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
+		return -NF_ACCEPT;
+	}
+
+	inip6off = icmp6off + sizeof(_hdr);
+	if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
+			  &inprotonum, sizeof(inprotonum)) != 0) {
+		DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
+		return -NF_ACCEPT;
+	}
+	inprotoff = nf_ct_ipv6_skip_exthdr(skb,
+					   inip6off + sizeof(struct ipv6hdr),
+					   &inprotonum,
+					   skb->len - inip6off
+						    - sizeof(struct ipv6hdr));
+
+	if ((inprotoff < 0) || (inprotoff > skb->len) ||
+	    (inprotonum == NEXTHDR_FRAGMENT)) {
+		DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
+		return -NF_ACCEPT;
+	}
+
+	inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
+			     &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
+		DEBUGP("icmpv6_error: Can't get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&intuple, &origtuple,
+				&nf_conntrack_l3proto_ipv6, inproto)) {
+		DEBUGP("icmpv6_error: Can't invert tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	*ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(&intuple, NULL);
+	if (!h) {
+		DEBUGP("icmpv6_error: no match\n");
+		return -NF_ACCEPT;
+	} else {
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			*ctinfo += IP_CT_IS_REPLY;
+	}
+
+	/* Update skb to refer to this connection */
+	skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+	skb->nfctinfo = *ctinfo;
+	return -NF_ACCEPT;
+}
+
+static int
+icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
+	     enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+{
+	struct icmp6hdr _ih, *icmp6h;
+
+	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmp6h == NULL) {
+		if (LOG_INVALID(IPPROTO_ICMPV6))
+		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+			      "nf_ct_icmpv6: short packet ");
+		return -NF_ACCEPT;
+	}
+
+	if (hooknum != NF_IP6_PRE_ROUTING)
+		goto skipped;
+
+	/* Ignore it if the checksum's bogus. */
+	if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+			    skb->len - dataoff, IPPROTO_ICMPV6,
+			    skb_checksum(skb, dataoff,
+					 skb->len - dataoff, 0))) {
+		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
+		return -NF_ACCEPT;
+	}
+
+skipped:
+
+	/* is not error message ? */
+	if (icmp6h->icmp6_type >= 128)
+		return NF_ACCEPT;
+
+	return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
+{
+	.l3proto		= PF_INET6,
+	.proto			= IPPROTO_ICMPV6,
+	.name			= "icmpv6",
+	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
+	.invert_tuple		= icmpv6_invert_tuple,
+	.print_tuple		= icmpv6_print_tuple,
+	.print_conntrack	= icmpv6_print_conntrack,
+	.packet			= icmpv6_packet,
+	.new			= icmpv6_new,
+	.error			= icmpv6_error,
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644
index 000000000000..7640b9bb7694
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -0,0 +1,885 @@
+/*
+ * IPv6 fragment reassembly for connection tracking
+ *
+ * Copyright (C)2004 USAGI/WIDE Project
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
+#define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
+#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
+
+int nf_ct_frag6_high_thresh = 256*1024;
+int nf_ct_frag6_low_thresh = 192*1024;
+int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
+
+struct nf_ct_frag6_skb_cb
+{
+	struct inet6_skb_parm	h;
+	int			offset;
+	struct sk_buff		*orig;
+};
+
+#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+
+struct nf_ct_frag6_queue
+{
+	struct nf_ct_frag6_queue	*next;
+	struct list_head lru_list;		/* lru list member	*/
+
+	__u32			id;		/* fragment id		*/
+	struct in6_addr		saddr;
+	struct in6_addr		daddr;
+
+	spinlock_t		lock;
+	atomic_t		refcnt;
+	struct timer_list	timer;		/* expire timer		*/
+	struct sk_buff		*fragments;
+	int			len;
+	int			meat;
+	struct timeval		stamp;
+	unsigned int		csum;
+	__u8			last_in;	/* has first/last segment arrived? */
+#define COMPLETE		4
+#define FIRST_IN		2
+#define LAST_IN			1
+	__u16			nhoffset;
+	struct nf_ct_frag6_queue	**pprev;
+};
+
+/* Hash table. */
+
+#define FRAG6Q_HASHSZ	64
+
+static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
+static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
+static u32 nf_ct_frag6_hash_rnd;
+static LIST_HEAD(nf_ct_frag6_lru_list);
+int nf_ct_frag6_nqueues = 0;
+
+static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
+{
+	if (fq->next)
+		fq->next->pprev = fq->pprev;
+	*fq->pprev = fq->next;
+	list_del(&fq->lru_list);
+	nf_ct_frag6_nqueues--;
+}
+
+static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
+{
+	write_lock(&nf_ct_frag6_lock);
+	__fq_unlink(fq);
+	write_unlock(&nf_ct_frag6_lock);
+}
+
+static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
+			       struct in6_addr *daddr)
+{
+	u32 a, b, c;
+
+	a = saddr->s6_addr32[0];
+	b = saddr->s6_addr32[1];
+	c = saddr->s6_addr32[2];
+
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += nf_ct_frag6_hash_rnd;
+	__jhash_mix(a, b, c);
+
+	a += saddr->s6_addr32[3];
+	b += daddr->s6_addr32[0];
+	c += daddr->s6_addr32[1];
+	__jhash_mix(a, b, c);
+
+	a += daddr->s6_addr32[2];
+	b += daddr->s6_addr32[3];
+	c += id;
+	__jhash_mix(a, b, c);
+
+	return c & (FRAG6Q_HASHSZ - 1);
+}
+
+static struct timer_list nf_ct_frag6_secret_timer;
+int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
+
+static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+{
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&nf_ct_frag6_lock);
+	get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
+	for (i = 0; i < FRAG6Q_HASHSZ; i++) {
+		struct nf_ct_frag6_queue *q;
+
+		q = nf_ct_frag6_hash[i];
+		while (q) {
+			struct nf_ct_frag6_queue *next = q->next;
+			unsigned int hval = ip6qhashfn(q->id,
+						       &q->saddr,
+						       &q->daddr);
+
+			if (hval != i) {
+				/* Unlink. */
+				if (q->next)
+					q->next->pprev = q->pprev;
+				*q->pprev = q->next;
+
+				/* Relink to new hash chain. */
+				if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
+					q->next->pprev = &q->next;
+				nf_ct_frag6_hash[hval] = q;
+				q->pprev = &nf_ct_frag6_hash[hval];
+			}
+
+			q = next;
+		}
+	}
+	write_unlock(&nf_ct_frag6_lock);
+
+	mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+}
+
+atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+
+/* Memory Tracking Functions. */
+static inline void frag_kfree_skb(struct sk_buff *skb)
+{
+	atomic_sub(skb->truesize, &nf_ct_frag6_mem);
+	if (NFCT_FRAG6_CB(skb)->orig)
+		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+
+	kfree_skb(skb);
+}
+
+static inline void frag_free_queue(struct nf_ct_frag6_queue *fq)
+{
+	atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+	kfree(fq);
+}
+
+static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
+{
+	struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+
+	if (!fq)
+		return NULL;
+	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+	return fq;
+}
+
+/* Destruction primitives. */
+
+/* Complete destruction of fq. */
+static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(fq->last_in&COMPLETE);
+	BUG_TRAP(del_timer(&fq->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = fq->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(fp);
+		fp = xp;
+	}
+
+	frag_free_queue(fq);
+}
+
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+{
+	if (atomic_dec_and_test(&fq->refcnt))
+		nf_ct_frag6_destroy(fq);
+}
+
+/* Kill fq entry. It is not destroyed immediately,
+ * because caller (and someone more) holds reference count.
+ */
+static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+{
+	if (del_timer(&fq->timer))
+		atomic_dec(&fq->refcnt);
+
+	if (!(fq->last_in & COMPLETE)) {
+		fq_unlink(fq);
+		atomic_dec(&fq->refcnt);
+		fq->last_in |= COMPLETE;
+	}
+}
+
+static void nf_ct_frag6_evictor(void)
+{
+	struct nf_ct_frag6_queue *fq;
+	struct list_head *tmp;
+
+	for (;;) {
+		if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh)
+			return;
+		read_lock(&nf_ct_frag6_lock);
+		if (list_empty(&nf_ct_frag6_lru_list)) {
+			read_unlock(&nf_ct_frag6_lock);
+			return;
+		}
+		tmp = nf_ct_frag6_lru_list.next;
+		fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
+		atomic_inc(&fq->refcnt);
+		read_unlock(&nf_ct_frag6_lock);
+
+		spin_lock(&fq->lock);
+		if (!(fq->last_in&COMPLETE))
+			fq_kill(fq);
+		spin_unlock(&fq->lock);
+
+		fq_put(fq);
+	}
+}
+
+static void nf_ct_frag6_expire(unsigned long data)
+{
+	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
+
+	spin_lock(&fq->lock);
+
+	if (fq->last_in & COMPLETE)
+		goto out;
+
+	fq_kill(fq);
+
+out:
+	spin_unlock(&fq->lock);
+	fq_put(fq);
+}
+
+/* Creation primitives. */
+
+
+static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
+					  struct nf_ct_frag6_queue *fq_in)
+{
+	struct nf_ct_frag6_queue *fq;
+
+	write_lock(&nf_ct_frag6_lock);
+#ifdef CONFIG_SMP
+	for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+		if (fq->id == fq_in->id && 
+		    !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
+		    !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
+			atomic_inc(&fq->refcnt);
+			write_unlock(&nf_ct_frag6_lock);
+			fq_in->last_in |= COMPLETE;
+			fq_put(fq_in);
+			return fq;
+		}
+	}
+#endif
+	fq = fq_in;
+
+	if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
+		atomic_inc(&fq->refcnt);
+
+	atomic_inc(&fq->refcnt);
+	if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
+		fq->next->pprev = &fq->next;
+	nf_ct_frag6_hash[hash] = fq;
+	fq->pprev = &nf_ct_frag6_hash[hash];
+	INIT_LIST_HEAD(&fq->lru_list);
+	list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
+	nf_ct_frag6_nqueues++;
+	write_unlock(&nf_ct_frag6_lock);
+	return fq;
+}
+
+
+static struct nf_ct_frag6_queue *
+nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,				   struct in6_addr *dst)
+{
+	struct nf_ct_frag6_queue *fq;
+
+	if ((fq = frag_alloc_queue()) == NULL) {
+		DEBUGP("Can't alloc new queue\n");
+		goto oom;
+	}
+
+	memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
+
+	fq->id = id;
+	ipv6_addr_copy(&fq->saddr, src);
+	ipv6_addr_copy(&fq->daddr, dst);
+
+	init_timer(&fq->timer);
+	fq->timer.function = nf_ct_frag6_expire;
+	fq->timer.data = (long) fq;
+	fq->lock = SPIN_LOCK_UNLOCKED;
+	atomic_set(&fq->refcnt, 1);
+
+	return nf_ct_frag6_intern(hash, fq);
+
+oom:
+	return NULL;
+}
+
+static __inline__ struct nf_ct_frag6_queue *
+fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
+{
+	struct nf_ct_frag6_queue *fq;
+	unsigned int hash = ip6qhashfn(id, src, dst);
+
+	read_lock(&nf_ct_frag6_lock);
+	for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
+		if (fq->id == id && 
+		    !ipv6_addr_cmp(src, &fq->saddr) &&
+		    !ipv6_addr_cmp(dst, &fq->daddr)) {
+			atomic_inc(&fq->refcnt);
+			read_unlock(&nf_ct_frag6_lock);
+			return fq;
+		}
+	}
+	read_unlock(&nf_ct_frag6_lock);
+
+	return nf_ct_frag6_create(hash, id, src, dst);
+}
+
+
+static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 
+			     struct frag_hdr *fhdr, int nhoff)
+{
+	struct sk_buff *prev, *next;
+	int offset, end;
+
+	if (fq->last_in & COMPLETE) {
+		DEBUGP("Allready completed\n");
+		goto err;
+	}
+
+	offset = ntohs(fhdr->frag_off) & ~0x7;
+	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
+			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+
+	if ((unsigned int)end > IPV6_MAXPLEN) {
+		DEBUGP("offset is too large.\n");
+ 		return -1;
+	}
+
+ 	if (skb->ip_summed == CHECKSUM_HW)
+ 		skb->csum = csum_sub(skb->csum,
+ 				     csum_partial(skb->nh.raw,
+						  (u8*)(fhdr + 1) - skb->nh.raw,
+						  0));
+
+	/* Is this the final fragment? */
+	if (!(fhdr->frag_off & htons(IP6_MF))) {
+		/* If we already have some bits beyond end
+		 * or have different end, the segment is corrupted.
+		 */
+		if (end < fq->len ||
+		    ((fq->last_in & LAST_IN) && end != fq->len)) {
+			DEBUGP("already received last fragment\n");
+			goto err;
+		}
+		fq->last_in |= LAST_IN;
+		fq->len = end;
+	} else {
+		/* Check if the fragment is rounded to 8 bytes.
+		 * Required by the RFC.
+		 */
+		if (end & 0x7) {
+			/* RFC2460 says always send parameter problem in
+			 * this case. -DaveM
+			 */
+			DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
+			return -1;
+		}
+		if (end > fq->len) {
+			/* Some bits beyond end -> corruption. */
+			if (fq->last_in & LAST_IN) {
+				DEBUGP("last packet already reached.\n");
+				goto err;
+			}
+			fq->len = end;
+		}
+	}
+
+	if (end == offset)
+		goto err;
+
+	/* Point into the IP datagram 'data' part. */
+	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
+		DEBUGP("queue: message is too short.\n");
+		goto err;
+	}
+	if (end-offset < skb->len) {
+		if (pskb_trim(skb, end - offset)) {
+			DEBUGP("Can't trim\n");
+			goto err;
+		}
+		if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+			skb->ip_summed = CHECKSUM_NONE;
+	}
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = NULL;
+	for (next = fq->fragments; next != NULL; next = next->next) {
+		if (NFCT_FRAG6_CB(next)->offset >= offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+	/* We found where to put this one.  Check for overlap with
+	 * preceding fragment, and, if needed, align things so that
+	 * any overlaps are eliminated.
+	 */
+	if (prev) {
+		int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
+
+		if (i > 0) {
+			offset += i;
+			if (end <= offset) {
+				DEBUGP("overlap\n");
+				goto err;
+			}
+			if (!pskb_pull(skb, i)) {
+				DEBUGP("Can't pull\n");
+				goto err;
+			}
+			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+				skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+
+	/* Look for overlap with succeeding segments.
+	 * If we can merge fragments, do it.
+	 */
+	while (next && NFCT_FRAG6_CB(next)->offset < end) {
+		/* overlap is 'i' bytes */
+		int i = end - NFCT_FRAG6_CB(next)->offset;
+
+		if (i < next->len) {
+			/* Eat head of the next overlapped fragment
+			 * and leave the loop. The next ones cannot overlap.
+			 */
+			DEBUGP("Eat head of the overlapped parts.: %d", i);
+			if (!pskb_pull(next, i))
+				goto err;
+
+			/* next fragment */
+			NFCT_FRAG6_CB(next)->offset += i;
+			fq->meat -= i;
+			if (next->ip_summed != CHECKSUM_UNNECESSARY)
+				next->ip_summed = CHECKSUM_NONE;
+			break;
+		} else {
+			struct sk_buff *free_it = next;
+
+			/* Old fragmnet is completely overridden with
+			 * new one drop it.
+			 */
+			next = next->next;
+
+			if (prev)
+				prev->next = next;
+			else
+				fq->fragments = next;
+
+			fq->meat -= free_it->len;
+			frag_kfree_skb(free_it);
+		}
+	}
+
+	NFCT_FRAG6_CB(skb)->offset = offset;
+
+	/* Insert this fragment in the chain of fragments. */
+	skb->next = next;
+	if (prev)
+		prev->next = skb;
+	else
+		fq->fragments = skb;
+
+	skb->dev = NULL;
+	skb_get_timestamp(skb, &fq->stamp);
+	fq->meat += skb->len;
+	atomic_add(skb->truesize, &nf_ct_frag6_mem);
+
+	/* The first fragment.
+	 * nhoffset is obtained from the first fragment, of course.
+	 */
+	if (offset == 0) {
+		fq->nhoffset = nhoff;
+		fq->last_in |= FIRST_IN;
+	}
+	write_lock(&nf_ct_frag6_lock);
+	list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
+	write_unlock(&nf_ct_frag6_lock);
+	return 0;
+
+err:
+	return -1;
+}
+
+/*
+ *	Check if this packet is complete.
+ *	Returns NULL on failure by any reason, and pointer
+ *	to current nexthdr field in reassembled frame.
+ *
+ *	It is called with locked fq, and caller must check that
+ *	queue is eligible for reassembly i.e. it is not COMPLETE,
+ *	the last and the first frames arrived and all the bits are here.
+ */
+static struct sk_buff *
+nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+{
+	struct sk_buff *fp, *op, *head = fq->fragments;
+	int    payload_len;
+
+	fq_kill(fq);
+
+	BUG_TRAP(head != NULL);
+	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
+
+	/* Unfragmented part is taken from the first segment. */
+	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	if (payload_len > IPV6_MAXPLEN) {
+		DEBUGP("payload len is too large.\n");
+		goto out_oversize;
+	}
+
+	/* Head of list must not be cloned. */
+	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+		DEBUGP("skb is cloned but can't expand head");
+		goto out_oom;
+	}
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments. */
+	if (skb_shinfo(head)->frag_list) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
+			DEBUGP("Can't alloc skb\n");
+			goto out_oom;
+		}
+		clone->next = head->next;
+		head->next = clone;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_shinfo(head)->frag_list = NULL;
+		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
+			plen += skb_shinfo(head)->frags[i].size;
+		clone->len = clone->data_len = head->data_len - plen;
+		head->data_len -= clone->len;
+		head->len -= clone->len;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+
+		NFCT_FRAG6_CB(clone)->orig = NULL;
+		atomic_add(clone->truesize, &nf_ct_frag6_mem);
+	}
+
+	/* We have to remove fragment header from datagram and to relocate
+	 * header in order to calculate ICV correctly. */
+	head->nh.raw[fq->nhoffset] = head->h.raw[0];
+	memmove(head->head + sizeof(struct frag_hdr), head->head, 
+		(head->data - head->head) - sizeof(struct frag_hdr));
+	head->mac.raw += sizeof(struct frag_hdr);
+	head->nh.raw += sizeof(struct frag_hdr);
+
+	skb_shinfo(head)->frag_list = head->next;
+	head->h.raw = head->data;
+	skb_push(head, head->data - head->nh.raw);
+	atomic_sub(head->truesize, &nf_ct_frag6_mem);
+
+	for (fp=head->next; fp; fp = fp->next) {
+		head->data_len += fp->len;
+		head->len += fp->len;
+		if (head->ip_summed != fp->ip_summed)
+			head->ip_summed = CHECKSUM_NONE;
+		else if (head->ip_summed == CHECKSUM_HW)
+			head->csum = csum_add(head->csum, fp->csum);
+		head->truesize += fp->truesize;
+		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+	}
+
+	head->next = NULL;
+	head->dev = dev;
+	skb_set_timestamp(head, &fq->stamp);
+	head->nh.ipv6h->payload_len = htons(payload_len);
+
+	/* Yes, and fold redundant checksum back. 8) */
+	if (head->ip_summed == CHECKSUM_HW)
+		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+
+	fq->fragments = NULL;
+
+	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+	fp = skb_shinfo(head)->frag_list;
+	if (NFCT_FRAG6_CB(fp)->orig == NULL)
+		/* at above code, head skb is divided into two skbs. */
+		fp = fp->next;
+
+	op = NFCT_FRAG6_CB(head)->orig;
+	for (; fp; fp = fp->next) {
+		struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
+
+		op->next = orig;
+		op = orig;
+		NFCT_FRAG6_CB(fp)->orig = NULL;
+	}
+
+	return head;
+
+out_oversize:
+	if (net_ratelimit())
+		printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
+	goto out_fail;
+out_oom:
+	if (net_ratelimit())
+		printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
+out_fail:
+	return NULL;
+}
+
+/*
+ * find the header just before Fragment Header.
+ *
+ * if success return 0 and set ...
+ * (*prevhdrp): the value of "Next Header Field" in the header
+ *		just before Fragment Header.
+ * (*prevhoff): the offset of "Next Header Field" in the header
+ *		just before Fragment Header.
+ * (*fhoff)   : the offset of Fragment Header.
+ *
+ * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
+ *
+ */
+static int
+find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
+{
+        u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
+	int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+	int len = skb->len - start;
+	u8 prevhdr = NEXTHDR_IPV6;
+
+        while (nexthdr != NEXTHDR_FRAGMENT) {
+                struct ipv6_opt_hdr hdr;
+                int hdrlen;
+
+		if (!ipv6_ext_hdr(nexthdr)) {
+			return -1;
+		}
+                if (len < (int)sizeof(struct ipv6_opt_hdr)) {
+			DEBUGP("too short\n");
+			return -1;
+		}
+                if (nexthdr == NEXTHDR_NONE) {
+			DEBUGP("next header is none\n");
+			return -1;
+		}
+                if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
+                        BUG();
+                if (nexthdr == NEXTHDR_AUTH)
+                        hdrlen = (hdr.hdrlen+2)<<2;
+                else
+                        hdrlen = ipv6_optlen(&hdr);
+
+		prevhdr = nexthdr;
+		prev_nhoff = start;
+
+                nexthdr = hdr.nexthdr;
+                len -= hdrlen;
+                start += hdrlen;
+        }
+
+	if (len < 0)
+		return -1;
+
+	*prevhdrp = prevhdr;
+	*prevhoff = prev_nhoff;
+	*fhoff = start;
+
+	return 0;
+}
+
+struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
+{
+	struct sk_buff *clone; 
+	struct net_device *dev = skb->dev;
+	struct frag_hdr *fhdr;
+	struct nf_ct_frag6_queue *fq;
+	struct ipv6hdr *hdr;
+	int fhoff, nhoff;
+	u8 prevhdr;
+	struct sk_buff *ret_skb = NULL;
+
+	/* Jumbo payload inhibits frag. header */
+	if (skb->nh.ipv6h->payload_len == 0) {
+		DEBUGP("payload len = 0\n");
+		return skb;
+	}
+
+	if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
+		return skb;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (clone == NULL) {
+		DEBUGP("Can't clone skb\n");
+		return skb;
+	}
+
+	NFCT_FRAG6_CB(clone)->orig = skb;
+
+	if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
+		DEBUGP("message is too short.\n");
+		goto ret_orig;
+	}
+
+	clone->h.raw = clone->data + fhoff;
+	hdr = clone->nh.ipv6h;
+	fhdr = (struct frag_hdr *)clone->h.raw;
+
+	if (!(fhdr->frag_off & htons(0xFFF9))) {
+		DEBUGP("Invalid fragment offset\n");
+		/* It is not a fragmented frame */
+		goto ret_orig;
+	}
+
+	if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+		nf_ct_frag6_evictor();
+
+	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
+	if (fq == NULL) {
+		DEBUGP("Can't find and can't create new queue\n");
+		goto ret_orig;
+	}
+
+	spin_lock(&fq->lock);
+
+	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+		spin_unlock(&fq->lock);
+		DEBUGP("Can't insert skb to queue\n");
+		fq_put(fq);
+		goto ret_orig;
+	}
+
+	if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+		ret_skb = nf_ct_frag6_reasm(fq, dev);
+		if (ret_skb == NULL)
+			DEBUGP("Can't reassemble fragmented packets\n");
+	}
+	spin_unlock(&fq->lock);
+
+	fq_put(fq);
+	return ret_skb;
+
+ret_orig:
+	kfree_skb(clone);
+	return skb;
+}
+
+void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+			struct net_device *in, struct net_device *out,
+			int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *s, *s2;
+
+	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
+		nf_conntrack_put_reasm(s->nfct_reasm);
+		nf_conntrack_get_reasm(skb);
+		s->nfct_reasm = skb;
+
+		s2 = s->next;
+		NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
+			       NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+		s = s2;
+	}
+	nf_conntrack_put_reasm(skb);
+}
+
+int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
+{
+	struct sk_buff *s, *s2;
+
+	for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
+
+		s2 = s->next;
+		kfree_skb(s);
+	}
+
+	kfree_skb(skb);
+
+	return 0;
+}
+
+int nf_ct_frag6_init(void)
+{
+	nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				   (jiffies ^ (jiffies >> 6)));
+
+	init_timer(&nf_ct_frag6_secret_timer);
+	nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+	nf_ct_frag6_secret_timer.expires = jiffies
+					   + nf_ct_frag6_secret_interval;
+	add_timer(&nf_ct_frag6_secret_timer);
+
+	return 0;
+}
+
+void nf_ct_frag6_cleanup(void)
+{
+	del_timer(&nf_ct_frag6_secret_timer);
+	nf_ct_frag6_evictor();
+}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a1265a320b11..651c79b41eeb 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
-			if (clone)
+			if (clone) {
+				nf_reset(clone);
 				rawv6_rcv(sk, clone);
+			}
 		}
 		sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
 				     IP6CB(skb)->iif);
author	Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>	2005-11-09 16:38:16 -0800
committer	David S. Miller <davem@davemloft.net>	2005-11-09 16:38:16 -0800
commit	9fb9cbb1082d6b31fb45aa1a14432449a0df6cf1 (patch)
tree	c964a62bdd766eca436c30f51a9e33e2b798b0a6 /net/ipv6
parent	6730c3c14421b7c924d06e31bb66e0adad225547 (diff)