summaryrefslogtreecommitdiff
path: root/net/ipv6/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 13:38:27 -0700
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /net/ipv6/netfilter
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'net/ipv6/netfilter')
-rw-r--r--net/ipv6/netfilter/Kconfig37
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c165
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c321
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c137
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c218
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
13 files changed, 1239 insertions, 175 deletions
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 10135342799e..c72532a60d88 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -181,9 +181,44 @@ config IP6_NF_SECURITY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
-
+
If unsure, say N.
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in ip6tables, see the man page for ip6tables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if NF_NAT_IPV6
+
+config IP6_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_NPT
+ tristate "NPT (Network Prefix translation) target support"
+ help
+ This option adds the `SNPT' and `DNPT' target, which perform
+ stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+endif # NF_NAT_IPV6
+
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 534d3f216f7b..2d11fcc2cf3c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -30,4 +34,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
# targets
+obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
+obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 000000000000..60e9053bab05
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nfct_nat(ct)->masq_index = par->out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS)
+ return -EINVAL;
+ return 0;
+}
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ const struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+
+ return masq_device_event(this, event, ifa->idev->dev);
+}
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static struct xt_target masquerade_tg6_reg __read_mostly = {
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV6,
+ .checkentry = masquerade_tg6_checkentry,
+ .target = masquerade_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init masquerade_tg6_init(void)
+{
+ int err;
+
+ err = xt_register_target(&masquerade_tg6_reg);
+ if (err == 0) {
+ register_netdevice_notifier(&masq_dev_notifier);
+ register_inet6addr_notifier(&masq_inet_notifier);
+ }
+
+ return err;
+}
+static void __exit masquerade_tg6_exit(void)
+{
+ unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ xt_unregister_target(&masquerade_tg6_reg);
+}
+
+module_init(masquerade_tg6_init);
+module_exit(masquerade_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 000000000000..e9486915eff6
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6t_NPT.h>
+#include <linux/netfilter/x_tables.h>
+
+static __sum16 csum16_complement(__sum16 a)
+{
+ return (__force __sum16)(0xffff - (__force u16)a);
+}
+
+static __sum16 csum16_add(__sum16 a, __sum16 b)
+{
+ u16 sum;
+
+ sum = (__force u16)a + (__force u16)b;
+ sum += (__force u16)a < (__force u16)b;
+ return (__force __sum16)sum;
+}
+
+static __sum16 csum16_sub(__sum16 a, __sum16 b)
+{
+ return csum16_add(a, csum16_complement(b));
+}
+
+static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
+{
+ struct ip6t_npt_tginfo *npt = par->targinfo;
+ __sum16 src_sum = 0, dst_sum = 0;
+ unsigned int i;
+
+ if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
+ src_sum = csum16_add(src_sum,
+ (__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
+ dst_sum = csum16_add(dst_sum,
+ (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
+ }
+
+ npt->adjustment = csum16_sub(src_sum, dst_sum);
+ return 0;
+}
+
+static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
+ struct in6_addr *addr)
+{
+ unsigned int pfx_len;
+ unsigned int i, idx;
+ __be32 mask;
+ __sum16 sum;
+
+ pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
+ for (i = 0; i < pfx_len; i += 32) {
+ if (pfx_len - i >= 32)
+ mask = 0;
+ else
+ mask = htonl(~((1 << (pfx_len - i)) - 1));
+
+ idx = i / 32;
+ addr->s6_addr32[idx] &= mask;
+ addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
+ }
+
+ if (pfx_len <= 48)
+ idx = 3;
+ else {
+ for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
+ if ((__force __sum16)addr->s6_addr16[idx] !=
+ CSUM_MANGLED_0)
+ break;
+ }
+ if (idx == ARRAY_SIZE(addr->s6_addr16))
+ return false;
+ }
+
+ sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
+ npt->adjustment);
+ if (sum == CSUM_MANGLED_0)
+ sum = 0;
+ *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
+
+ return true;
+}
+
+static unsigned int
+ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, saddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static unsigned int
+ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, daddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
+ {
+ .name = "SNPT",
+ .target = ip6t_snpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DNPT",
+ .target = ip6t_dnpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init ip6t_npt_init(void)
+{
+ return xt_register_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+static void __exit ip6t_npt_exit(void)
+{
+ xt_unregister_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+module_init(ip6t_npt_init);
+module_exit(ip6t_npt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_SNPT");
+MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 325e59a0224f..beb5777d2043 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_filter))
- return PTR_ERR(net->ipv6.ip6table_filter);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_filter);
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 4d782405f125..7431121b87de 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_mangle))
- return PTR_ERR(net->ipv6.ip6table_mangle);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_mangle);
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 000000000000..e418bd6350a4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
+ * funded by Astaro.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv6_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
+
+ ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (!nat) {
+ /* NAT module was loaded late. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL) {
+ pr_debug("failed to add NAT extension\n");
+ return NF_ACCEPT;
+ }
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ hooknum, hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ }
+
+ return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+
+static unsigned int
+nf_nat_ipv6_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init ip6table_nat_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_nat))
+ return PTR_ERR(net->ipv6.ip6table_nat);
+ return 0;
+}
+
+static void __net_exit ip6table_nat_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+}
+
+static struct pernet_operations ip6table_nat_net_ops = {
+ .init = ip6table_nat_net_init,
+ .exit = ip6table_nat_net_exit,
+};
+
+static int __init ip6table_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&ip6table_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit ip6table_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+}
+
+module_init(ip6table_nat_init);
+module_exit(ip6table_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5b9926a011bd..60d1bddff7a0 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_raw))
- return PTR_ERR(net->ipv6.ip6table_raw);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_raw);
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 91aa2b4d83c9..db155351339c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_security))
- return PTR_ERR(net->ipv6.ip6table_security);
-
- return 0;
+ return PTR_RET(net->ipv6.ip6table_security);
}
static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e0..8860d23e61cf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
@@ -64,82 +65,31 @@ static int ipv6_print_tuple(struct seq_file *s,
tuple->src.u3.ip6, tuple->dst.u3.ip6);
}
-/*
- * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
- *
- * This function parses (probably truncated) exthdr set "hdr"
- * of length "len". "nexthdrp" initially points to some place,
- * where type of the first header can be found.
- *
- * It skips all well-known exthdrs, and returns pointer to the start
- * of unparsable area i.e. the first header with unknown type.
- * if success, *nexthdr is updated by type/protocol of this header.
- *
- * NOTES: - it may return pointer pointing beyond end of packet,
- * if the last recognized header is truncated in the middle.
- * - if packet is truncated, so that all parsed headers are skipped,
- * it returns -1.
- * - if packet is fragmented, return pointer of the fragment header.
- * - ESP is unparsable for now and considered like
- * normal payload protocol.
- * - Note also special handling of AUTH header. Thanks to IPsec wizards.
- */
-
-static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
- u8 *nexthdrp, int len)
-{
- u8 nexthdr = *nexthdrp;
-
- while (ipv6_ext_hdr(nexthdr)) {
- struct ipv6_opt_hdr hdr;
- int hdrlen;
-
- if (len < (int)sizeof(struct ipv6_opt_hdr))
- return -1;
- if (nexthdr == NEXTHDR_NONE)
- break;
- if (nexthdr == NEXTHDR_FRAGMENT)
- break;
- if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
- BUG();
- if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
- else
- hdrlen = ipv6_optlen(&hdr);
-
- nexthdr = hdr.nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *nexthdrp = nexthdr;
- return start;
-}
-
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
- unsigned char pnum;
+ __be16 frag_off;
int protoff;
+ u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
- &pnum, sizeof(pnum)) != 0) {
+ &nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("ip6_conntrack_core: can't get nexthdr\n");
return -NF_ACCEPT;
}
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff);
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
*/
- if ((protoff < 0) || (protoff > skb->len)) {
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
return -NF_ACCEPT;
}
*dataoff = protoff;
- *protonum = pnum;
+ *protonum = nexthdr;
return NF_ACCEPT;
}
@@ -153,10 +103,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
- unsigned int ret, protoff;
- unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
- unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-
+ unsigned int ret;
+ __be16 frag_off;
+ int protoff;
+ u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +121,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
if (!helper)
return NF_ACCEPT;
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
- skb->len - extoff);
- if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
@@ -192,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ int protoff;
+ __be16 frag_off;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ goto out;
+ }
+
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+ seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+ if (!seq_adjust ||
+ !seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
@@ -199,9 +180,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
static unsigned int __ipv6_conntrack_in(struct net *net,
unsigned int hooknum,
struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb->nfct_reasm;
+ const struct nf_conn_help *help;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
@@ -213,6 +199,25 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
if (ret != NF_ACCEPT)
return ret;
}
+
+ /* Conntrack helpers need the entire reassembled packet in the
+ * POST_ROUTING hook. In case of unconfirmed connections NAT
+ * might reassign a helper, so the entire packet is also
+ * required.
+ */
+ ct = nf_ct_get(reasm, &ctinfo);
+ if (ct != NULL && !nf_ct_is_untracked(ct)) {
+ help = nfct_help(ct);
+ if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
+ nf_conntrack_get_reasm(skb);
+ NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
+ (struct net_device *)in,
+ (struct net_device *)out,
+ okfn, NF_IP6_PRI_CONNTRACK + 1);
+ return NF_DROP_ERR(-ECANCELED);
+ }
+ }
+
nf_conntrack_get(reasm->nfct);
skb->nfct = reasm->nfct;
skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +233,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
+ return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +247,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
+ return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666b..18bd9bbbd1c6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,41 +57,27 @@ struct nf_ct_frag6_skb_cb
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
-struct nf_ct_frag6_queue
-{
- struct inet_frag_queue q;
-
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
- unsigned int csum;
- __u16 nhoffset;
-};
-
static struct inet_frags nf_frags;
-static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_init_frags.timeout,
+ .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_init_frags.low_thresh,
+ .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_init_frags.high_thresh,
+ .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
@@ -99,68 +85,86 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{ }
};
-static struct ctl_table_header *nf_ct_frag6_sysctl_header;
-#endif
-
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
{
- const struct nf_ct_frag6_queue *nq;
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = nf_ct_frag6_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
+ GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &net->ipv6.frags.high_thresh;
+ table[1].data = &net->ipv6.frags.low_thresh;
+ table[2].data = &net->ipv6.frags.timeout;
+ }
- nq = container_of(q, struct nf_ct_frag6_queue, q);
- return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+ hdr = register_net_sysctl(net, "net/netfilter", table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->nf_frag.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
}
-static void nf_skb_free(struct sk_buff *skb)
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-}
+ struct ctl_table *table;
-/* Destruction primitives. */
+ table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+#else
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
{
- inet_frag_put(&fq->q, &nf_frags);
+ return 0;
}
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+}
+#endif
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- inet_frag_kill(&fq->q, &nf_frags);
+ const struct frag_queue *nq;
+
+ nq = container_of(q, struct frag_queue, q);
+ return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
}
-static void nf_ct_frag6_evictor(void)
+static void nf_skb_free(struct sk_buff *skb)
{
- local_bh_disable();
- inet_frag_evictor(&nf_init_frags, &nf_frags);
- local_bh_enable();
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
}
static void nf_ct_frag6_expire(unsigned long data)
{
- struct nf_ct_frag6_queue *fq;
-
- fq = container_of((struct inet_frag_queue *)data,
- struct nf_ct_frag6_queue, q);
-
- spin_lock(&fq->q.lock);
+ struct frag_queue *fq;
+ struct net *net;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
- goto out;
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, nf_frag.frags);
- fq_kill(fq);
-
-out:
- spin_unlock(&fq->q.lock);
- fq_put(fq);
+ ip6_expire_frag_queue(net, fq, &nf_frags);
}
/* Creation primitives. */
-
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
+static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+ u32 user, struct in6_addr *src,
+ struct in6_addr *dst)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -174,22 +178,23 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
read_lock_bh(&nf_frags.lock);
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
- q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
+ q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
if (q == NULL)
goto oom;
- return container_of(q, struct nf_ct_frag6_queue, q);
+ return container_of(q, struct frag_queue, q);
oom:
return NULL;
}
-static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
+static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
+ unsigned int payload_len;
int offset, end;
if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +202,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
goto err;
}
+ payload_len = ntohs(ipv6_hdr(skb)->payload_len);
+
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ end = offset + (payload_len -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,7 +314,9 @@ found:
skb->dev = NULL;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &nf_init_frags.mem);
+ if (payload_len > fq->q.max_size)
+ fq->q.max_size = payload_len;
+ atomic_add(skb->truesize, &fq->q.net->mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -317,12 +326,12 @@ found:
fq->q.last_in |= INET_FRAG_FIRST_IN;
}
write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
+ list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
write_unlock(&nf_frags.lock);
return 0;
discard_fq:
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
err:
return -1;
}
@@ -337,12 +346,12 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static struct sk_buff *
-nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -386,7 +395,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_init_frags.mem);
+ atomic_add(clone->truesize, &fq->q.net->mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -410,12 +419,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- atomic_sub(head->truesize, &nf_init_frags.mem);
+ atomic_sub(head->truesize, &fq->q.net->mem);
+ head->local_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -520,8 +531,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
+ struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
+ : dev_net(skb->dev);
struct frag_hdr *fhdr;
- struct nf_ct_frag6_queue *fq;
+ struct frag_queue *fq;
struct ipv6hdr *hdr;
int fhoff, nhoff;
u8 prevhdr;
@@ -553,10 +566,11 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
- nf_ct_frag6_evictor();
+ local_bh_disable();
+ inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+ local_bh_enable();
- fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
@@ -567,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
spin_unlock_bh(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
goto ret_orig;
}
@@ -579,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
}
spin_unlock_bh(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
return ret_skb;
ret_orig:
@@ -592,6 +606,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *s, *s2;
+ unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,49 +616,62 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
s2 = s->next;
s->next = NULL;
- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
- NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ if (ret != -ECANCELED)
+ ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
+ in, out, okfn,
+ NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ else
+ kfree_skb(s);
+
s = s2;
}
nf_conntrack_put_reasm(skb);
}
+static int nf_ct_net_init(struct net *net)
+{
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
+}
+
+static void nf_ct_net_exit(struct net *net)
+{
+ nf_ct_frags6_sysctl_unregister(net);
+ inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+static struct pernet_operations nf_ct_net_ops = {
+ .init = nf_ct_net_init,
+ .exit = nf_ct_net_exit,
+};
+
int nf_ct_frag6_init(void)
{
+ int ret = 0;
+
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.skb_free = nf_skb_free;
- nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.secret_interval = 10 * 60 * HZ;
- nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
- nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
-#ifdef CONFIG_SYSCTL
- nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter",
- nf_ct_frag6_sysctl_table);
- if (!nf_ct_frag6_sysctl_header) {
+ ret = register_pernet_subsys(&nf_ct_net_ops);
+ if (ret)
inet_frags_fini(&nf_frags);
- return -ENOMEM;
- }
-#endif
- return 0;
+ return ret;
}
void nf_ct_frag6_cleanup(void)
{
-#ifdef CONFIG_SYSCTL
- unregister_net_sysctl_table(nf_ct_frag6_sysctl_header);
- nf_ct_frag6_sysctl_header = NULL;
-#endif
+ unregister_pernet_subsys(&nf_ct_net_ops);
inet_frags_fini(&nf_frags);
-
- nf_init_frags.low_thresh = 0;
- nf_ct_frag6_evictor();
}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 000000000000..abfe75a2e316
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of IPv6 NAT funded by Astaro.
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (ct->status & statusbit) {
+ fl6->daddr = t->dst.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl6->saddr = t->src.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_sport = t->src.u.all;
+ }
+}
+#endif
+
+static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+ ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct ipv6hdr *ipv6h;
+ __be16 frag_off;
+ int hdroff;
+ u8 nexthdr;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ return false;
+
+ ipv6h = (void *)skb->data + iphdroff;
+ nexthdr = ipv6h->nexthdr;
+ hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+ &nexthdr, &frag_off);
+ if (hdroff < 0)
+ goto manip_addr;
+
+ if ((frag_off & htons(~0x7)) == 0 &&
+ !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+manip_addr:
+ if (maniptype == NF_NAT_MANIP_SRC)
+ ipv6h->saddr = target->src.u3.in6;
+ else
+ ipv6h->daddr = target->dst.u3.in6;
+
+ return true;
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+ const struct in6_addr *oldip, *newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = &ipv6h->saddr;
+ newip = &t->src.u3.in6;
+ } else {
+ oldip = &ipv6h->daddr;
+ newip = &t->dst.u3.in6;
+ }
+ inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+ newip->s6_addr32, 1);
+}
+
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt6i_flags & RTF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ (data - (void *)skb->data);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V6_MINIP]) {
+ nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+ sizeof(struct in6_addr));
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V6_MAXIP])
+ nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+ sizeof(struct in6_addr));
+ else
+ range->max_addr = range->min_addr;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+ .l3proto = NFPROTO_IPV6,
+ .secure_port = nf_nat_ipv6_secure_port,
+ .in_range = nf_nat_ipv6_in_range,
+ .manip_pkt = nf_nat_ipv6_manip_pkt,
+ .csum_update = nf_nat_ipv6_csum_update,
+ .csum_recalc = nf_nat_ipv6_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv6_decode_session,
+#endif
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ unsigned int hdrlen)
+{
+ struct {
+ struct icmp6hdr icmp6;
+ struct ipv6hdr ip6;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
+ if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp6.icmp6_cksum = 0;
+ inside->icmp6.icmp6_cksum =
+ csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ skb->len - hdrlen, IPPROTO_ICMPV6,
+ csum_partial(&inside->icmp6,
+ skb->len - hdrlen, 0));
+ }
+
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static int __init nf_nat_l3proto_ipv6_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv6_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
+
+module_init(nf_nat_l3proto_ipv6_init);
+module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 000000000000..5d6da784305b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u16 id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xffff;
+
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+ (id % range_size));
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmp6hdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmp6hdr *)(skb->data + hdroff);
+ l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+ tuple, maniptype);
+ if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+ hdr->icmp6_identifier,
+ tuple->src.u.icmp.id, 0);
+ hdr->icmp6_identifier = tuple->src.u.icmp.id;
+ }
+ return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
+ .l4proto = IPPROTO_ICMPV6,
+ .manip_pkt = icmpv6_manip_pkt,
+ .in_range = icmpv6_in_range,
+ .unique_tuple = icmpv6_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};