From 971f359ddcb2e7a0d577479c7561bda407febe1b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 8 Nov 2005 09:37:56 -0800 Subject: [IPV6]: Put addr_diff() into common header for future use. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 54 ++---------------------------------------------------- 1 file changed, 2 insertions(+), 52 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4fcc5a7acf6e..1bf6d9a769e6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -127,56 +127,6 @@ static __inline__ int addr_bit_set(void *token, int fn_bit) return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; } -/* - * find the first different bit between two addresses - * length of address must be a multiple of 32bits - */ - -static __inline__ int addr_diff(void *token1, void *token2, int addrlen) -{ - __u32 *a1 = token1; - __u32 *a2 = token2; - int i; - - addrlen >>= 2; - - for (i = 0; i < addrlen; i++) { - __u32 xb; - - xb = a1[i] ^ a2[i]; - - if (xb) { - int j = 31; - - xb = ntohl(xb); - - while ((xb & (1 << j)) == 0) - j--; - - return (i * 32 + 31 - j); - } - } - - /* - * we should *never* get to this point since that - * would mean the addrs are equal - * - * However, we do get to it 8) And exacly, when - * addresses are equal 8) - * - * ip route add 1111::/128 via ... - * ip route add 1111::/64 via ... - * and we are here. - * - * Ideally, this function should stop comparison - * at prefix length. It does not, but it is still OK, - * if returned value is greater than prefix length. - * --ANK (980803) - */ - - return addrlen<<5; -} - static __inline__ struct fib6_node * node_alloc(void) { struct fib6_node *fn; @@ -296,11 +246,11 @@ insert_above: /* find 1st bit in difference between the 2 addrs. - See comment in addr_diff: bit may be an invalid value, + See comment in __ipv6_addr_diff: bit may be an invalid value, but if it is >= plen, the value is ignored in any case. */ - bit = addr_diff(addr, &key->addr, addrlen); + bit = __ipv6_addr_diff(addr, &key->addr, addrlen); /* * (intermediate)[in] -- cgit v1.2.3 From b1cacb6820e0afc4aeeea67bcb5296a316862cad Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 8 Nov 2005 09:38:12 -0800 Subject: [IPV6]: Make ipv6_addr_type() more generic so that we can use it for source address selection. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 90 +++++++++++++++++++++++++++------------------------- net/ipv6/ipv6_syms.c | 2 +- 2 files changed, 48 insertions(+), 44 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2c5f57299d63..ff895da6395b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -35,6 +35,9 @@ * YOSHIFUJI Hideaki @USAGI : ARCnet support * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to * seq_file. + * YOSHIFUJI Hideaki @USAGI : improved source address + * selection; consider scope, + * status etc. */ #include @@ -193,46 +196,51 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; #endif const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; -int ipv6_addr_type(const struct in6_addr *addr) +#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) + +static inline unsigned ipv6_addr_scope2type(unsigned scope) +{ + switch(scope) { + case IPV6_ADDR_SCOPE_NODELOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | + IPV6_ADDR_LOOPBACK); + case IPV6_ADDR_SCOPE_LINKLOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) | + IPV6_ADDR_LINKLOCAL); + case IPV6_ADDR_SCOPE_SITELOCAL: + return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) | + IPV6_ADDR_SITELOCAL); + } + return IPV6_ADDR_SCOPE_TYPE(scope); +} + +int __ipv6_addr_type(const struct in6_addr *addr) { - int type; u32 st; st = addr->s6_addr32[0]; - if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { - type = IPV6_ADDR_MULTICAST; - - switch((st & htonl(0x00FF0000))) { - case __constant_htonl(0x00010000): - type |= IPV6_ADDR_LOOPBACK; - break; - - case __constant_htonl(0x00020000): - type |= IPV6_ADDR_LINKLOCAL; - break; - - case __constant_htonl(0x00050000): - type |= IPV6_ADDR_SITELOCAL; - break; - }; - return type; - } - - type = IPV6_ADDR_UNICAST; - /* Consider all addresses with the first three bits different of - 000 and 111 as finished. + 000 and 111 as unicasts. */ if ((st & htonl(0xE0000000)) != htonl(0x00000000) && (st & htonl(0xE0000000)) != htonl(0xE0000000)) - return type; - - if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) - return (IPV6_ADDR_LINKLOCAL | type); + return (IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); + if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { + /* multicast */ + /* addr-select 3.1 */ + return (IPV6_ADDR_MULTICAST | + ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr))); + } + + if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) - return (IPV6_ADDR_SITELOCAL | type); + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { if (addr->s6_addr32[2] == 0) { @@ -240,24 +248,20 @@ int ipv6_addr_type(const struct in6_addr *addr) return IPV6_ADDR_ANY; if (addr->s6_addr32[3] == htonl(0x00000001)) - return (IPV6_ADDR_LOOPBACK | type); + return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */ - return (IPV6_ADDR_COMPATv4 | type); + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } if (addr->s6_addr32[2] == htonl(0x0000ffff)) - return IPV6_ADDR_MAPPED; - } - - st &= htonl(0xFF000000); - if (st == 0) - return IPV6_ADDR_RESERVED; - st &= htonl(0xFE000000); - if (st == htonl(0x02000000)) - return IPV6_ADDR_RESERVED; /* for NSAP */ - if (st == htonl(0x04000000)) - return IPV6_ADDR_RESERVED; /* for IPX */ - return type; + return (IPV6_ADDR_MAPPED | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ + } + + return (IPV6_ADDR_RESERVED | + IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */ } static void addrconf_del_timer(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 37a4a99c9fe9..16482785bdfd 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -7,7 +7,7 @@ #include #include -EXPORT_SYMBOL(ipv6_addr_type); +EXPORT_SYMBOL(__ipv6_addr_type); EXPORT_SYMBOL(icmpv6_send); EXPORT_SYMBOL(icmpv6_statistics); EXPORT_SYMBOL(icmpv6_err_convert); -- cgit v1.2.3 From 072047e4de3800905e09d0f8ef0e1cc4e91a601e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 8 Nov 2005 09:38:30 -0800 Subject: [IPV6]: RFC3484 compliant source address selection Choose more appropriate source address; e.g. - outgoing interface - non-deprecated - scope - matching label Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 344 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 240 insertions(+), 104 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ff895da6395b..a34d1504deb9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -809,138 +809,274 @@ out: #endif /* - * Choose an appropriate source address - * should do: - * i) get an address with an appropriate scope - * ii) see if there is a specific route for the destination and use - * an address of the attached interface - * iii) don't use deprecated addresses + * Choose an appropriate source address (RFC3484) */ -static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) +struct ipv6_saddr_score { + int addr_type; + unsigned int attrs; + int matchlen; + unsigned int scope; + unsigned int rule; +}; + +#define IPV6_SADDR_SCORE_LOCAL 0x0001 +#define IPV6_SADDR_SCORE_PREFERRED 0x0004 +#define IPV6_SADDR_SCORE_HOA 0x0008 +#define IPV6_SADDR_SCORE_OIF 0x0010 +#define IPV6_SADDR_SCORE_LABEL 0x0020 +#define IPV6_SADDR_SCORE_PRIVACY 0x0040 + +static int inline ipv6_saddr_preferred(int type) { - int pref; - pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2; -#ifdef CONFIG_IPV6_PRIVACY - pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1; -#endif - return pref; + if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| + IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) + return 1; + return 0; } -#ifdef CONFIG_IPV6_PRIVACY -#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3) -#else -#define IPV6_GET_SADDR_MAXSCORE(score) (score) -#endif +/* static matching label */ +static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) +{ + /* + * prefix (longest match) label + * ----------------------------- + * ::1/128 0 + * ::/0 1 + * 2002::/16 2 + * ::/96 3 + * ::ffff:0:0/96 4 + */ + if (type & IPV6_ADDR_LOOPBACK) + return 0; + else if (type & IPV6_ADDR_COMPATv4) + return 3; + else if (type & IPV6_ADDR_MAPPED) + return 4; + else if (addr->s6_addr16[0] == htons(0x2002)) + return 2; + return 1; +} -int ipv6_dev_get_saddr(struct net_device *dev, +int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct in6_addr *daddr, struct in6_addr *saddr) { - struct inet6_ifaddr *ifp = NULL; - struct inet6_ifaddr *match = NULL; - struct inet6_dev *idev; - int scope; - int err; - int hiscore = -1, score; + struct ipv6_saddr_score hiscore; + struct inet6_ifaddr *ifa_result = NULL; + int daddr_type = __ipv6_addr_type(daddr); + int daddr_scope = __ipv6_addr_src_scope(daddr_type); + u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); + struct net_device *dev; - scope = ipv6_addr_scope(daddr); + memset(&hiscore, 0, sizeof(hiscore)); - /* - * known dev - * search dev and walk through dev addresses - */ + read_lock(&dev_base_lock); + read_lock(&addrconf_lock); - if (dev) { - if (dev->flags & IFF_LOOPBACK) - scope = IFA_HOST; + for (dev = dev_base; dev; dev=dev->next) { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + + /* Rule 0: Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + */ + if ((daddr_type & IPV6_ADDR_MULTICAST || + daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + daddr_dev && dev != daddr_dev) + continue; - read_lock(&addrconf_lock); idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; -#ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); -#endif - if (score <= hiscore) - continue; + if (!idev) + continue; - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); + read_lock_bh(&idev->lock); + for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { + struct ipv6_saddr_score score; - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - read_unlock(&addrconf_lock); - goto out; - } + score.addr_type = __ipv6_addr_type(&ifa->addr); + + /* Rule 0: Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + if (unlikely(score.addr_type == IPV6_ADDR_ANY || + score.addr_type & IPV6_ADDR_MULTICAST)) { + LIMIT_NETDEBUG(KERN_DEBUG + "ADDRCONF: unspecified / multicast address" + "assigned as unicast address on %s", + dev->name); + continue; + } + + score.attrs = 0; + score.matchlen = 0; + score.scope = 0; + score.rule = 0; + + if (ifa_result == NULL) { + /* record it if the first available entry */ + goto record_it; + } + + /* Rule 1: Prefer same address */ + if (hiscore.rule < 1) { + if (ipv6_addr_equal(&ifa_result->addr, daddr)) + hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; + hiscore.rule++; + } + if (ipv6_addr_equal(&ifa->addr, daddr)) { + score.attrs |= IPV6_SADDR_SCORE_LOCAL; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { + score.rule = 1; + goto record_it; } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) + continue; } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - } - if (scope == IFA_LINK) - goto out; + /* Rule 2: Prefer appropriate scope */ + if (hiscore.rule < 2) { + hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); + hiscore.rule++; + } + score.scope = __ipv6_addr_src_scope(score.addr_type); + if (hiscore.scope < score.scope) { + if (hiscore.scope < daddr_scope) { + score.rule = 2; + goto record_it; + } else + continue; + } else if (score.scope < hiscore.scope) { + if (score.scope < daddr_scope) + continue; + else { + score.rule = 2; + goto record_it; + } + } - /* - * dev == NULL or search failed for specified dev - */ + /* Rule 3: Avoid deprecated address */ + if (hiscore.rule < 3) { + if (ipv6_saddr_preferred(hiscore.addr_type) || + !(ifa_result->flags & IFA_F_DEPRECATED)) + hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; + hiscore.rule++; + } + if (ipv6_saddr_preferred(score.addr_type) || + !(ifa->flags & IFA_F_DEPRECATED)) { + score.attrs |= IPV6_SADDR_SCORE_PREFERRED; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { + score.rule = 3; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) + continue; + } - read_lock(&dev_base_lock); - read_lock(&addrconf_lock); - for (dev = dev_base; dev; dev=dev->next) { - idev = __in6_dev_get(dev); - if (idev) { - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope) { - if (ifp->flags&IFA_F_TENTATIVE) - continue; -#ifdef CONFIG_IPV6_PRIVACY - score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); -#else - score = ipv6_saddr_pref(ifp, 0); -#endif - if (score <= hiscore) - continue; + /* Rule 4: Prefer home address -- not implemented yet */ - if (match) - in6_ifa_put(match); - match = ifp; - hiscore = score; - in6_ifa_hold(ifp); + /* Rule 5: Prefer outgoing interface */ + if (hiscore.rule < 5) { + if (daddr_dev == NULL || + daddr_dev == ifa_result->idev->dev) + hiscore.attrs |= IPV6_SADDR_SCORE_OIF; + hiscore.rule++; + } + if (daddr_dev == NULL || + daddr_dev == ifa->idev->dev) { + score.attrs |= IPV6_SADDR_SCORE_OIF; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { + score.rule = 5; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) + continue; + } - if (IPV6_GET_SADDR_MAXSCORE(score)) { - read_unlock_bh(&idev->lock); - goto out_unlock_base; - } + /* Rule 6: Prefer matching label */ + if (hiscore.rule < 6) { + if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) + hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; + hiscore.rule++; + } + if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { + score.attrs |= IPV6_SADDR_SCORE_LABEL; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { + score.rule = 6; + goto record_it; } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) + continue; } - read_unlock_bh(&idev->lock); + + /* Rule 7: Prefer public address + * Note: prefer temprary address if use_tempaddr >= 2 + */ + if (hiscore.rule < 7) { + if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ + (ifa_result->idev->cnf.use_tempaddr >= 2)) + hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; + hiscore.rule++; + } + if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ + (ifa->idev->cnf.use_tempaddr >= 2)) { + score.attrs |= IPV6_SADDR_SCORE_PRIVACY; + if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { + score.rule = 7; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) + continue; + } + + /* Rule 8: Use longest matching prefix */ + if (hiscore.rule < 8) + hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); + score.rule++; + score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); + if (score.matchlen > hiscore.matchlen) { + score.rule = 8; + goto record_it; + } +#if 0 + else if (score.matchlen < hiscore.matchlen) + continue; +#endif + + /* Final Rule: choose first available one */ + continue; +record_it: + if (ifa_result) + in6_ifa_put(ifa_result); + in6_ifa_hold(ifa); + ifa_result = ifa; + hiscore = score; } + read_unlock_bh(&idev->lock); } - -out_unlock_base: read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); -out: - err = -EADDRNOTAVAIL; - if (match) { - ipv6_addr_copy(saddr, &match->addr); - err = 0; - in6_ifa_put(match); - } - - return err; + if (!ifa_result) + return -EADDRNOTAVAIL; + + ipv6_addr_copy(saddr, &ifa_result->addr); + in6_ifa_put(ifa_result); + return 0; } -- cgit v1.2.3 From a51482bde22f99c63fbbb57d5d46cc666384e379 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 8 Nov 2005 09:41:34 -0800 Subject: [NET]: kfree cleanup From: Jesper Juhl This is the net/ part of the big kfree cleanup patch. Remove pointless checks for NULL prior to calling kfree() in net/. Signed-off-by: Jesper Juhl Cc: "David S. Miller" Cc: Arnaldo Carvalho de Melo Acked-by: Marcel Holtmann Acked-by: YOSHIFUJI Hideaki Signed-off-by: Andrew Morton --- net/ipv6/addrconf.c | 3 +-- net/ipv6/ip6_output.c | 15 +++++---------- net/ipv6/ip6_tunnel.c | 6 ++---- net/ipv6/ipcomp6.c | 3 +-- net/ipv6/ipv6_sockglue.c | 3 +-- 5 files changed, 10 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a34d1504deb9..b7a5f51238b3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3090,8 +3090,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nlmsg_failure: rtattr_failure: - if (array) - kfree(array); + kfree(array); skb_trim(skb, b - skb->data); return -1; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 614296a920c6..dbd9767b32e4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -587,8 +587,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb->next = NULL; } - if (tmp_hdr) - kfree(tmp_hdr); + kfree(tmp_hdr); if (err == 0) { IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -1186,10 +1185,8 @@ int ip6_push_pending_frames(struct sock *sk) out: inet->cork.flags &= ~IPCORK_OPT; - if (np->cork.opt) { - kfree(np->cork.opt); - np->cork.opt = NULL; - } + kfree(np->cork.opt); + np->cork.opt = NULL; if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; @@ -1214,10 +1211,8 @@ void ip6_flush_pending_frames(struct sock *sk) inet->cork.flags &= ~IPCORK_OPT; - if (np->cork.opt) { - kfree(np->cork.opt); - np->cork.opt = NULL; - } + kfree(np->cork.opt); + np->cork.opt = NULL; if (np->cork.rt) { dst_release(&np->cork.rt->u.dst); np->cork.rt = NULL; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cf94372d1af3..e6b0e3954c02 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -756,8 +756,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } ip6_tnl_dst_store(t, dst); - if (opt) - kfree(opt); + kfree(opt); t->recursion--; return 0; @@ -766,8 +765,7 @@ tx_err_link_failure: dst_link_failure(skb); tx_err_dst_release: dst_release(dst); - if (opt) - kfree(opt); + kfree(opt); tx_err: stats->tx_errors++; stats->tx_dropped++; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 85bfbc69b2c3..55917fb17094 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -130,8 +130,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, s out_put_cpu: put_cpu(); out: - if (tmp_hdr) - kfree(tmp_hdr); + kfree(tmp_hdr); if (err) goto error_out; return nexthdr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8567873d0dd8..003fd99ff597 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -80,8 +80,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) if (ra->sk == sk) { if (sel>=0) { write_unlock_bh(&ip6_ra_lock); - if (new_ra) - kfree(new_ra); + kfree(new_ra); return -EADDRINUSE; } -- cgit v1.2.3 From 44fd0261d3509b0b4303fd9ba792058d230186ab Mon Sep 17 00:00:00 2001 From: Peter Chubb Date: Wed, 9 Nov 2005 13:05:47 -0800 Subject: [IPV6]: Fix fallout from CONFIG_IPV6_PRIVACY Trying to build today's 2.6.14+git snapshot gives undefined references to use_tempaddr Looks like an ifdef got left out. Signed-off-by: Peter Chubb Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b7a5f51238b3..ddcf7754eec2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1022,6 +1022,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } +#ifdef CONFIG_IPV6_PRIVACY /* Rule 7: Prefer public address * Note: prefer temprary address if use_tempaddr >= 2 */ @@ -1042,7 +1043,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) continue; } - +#endif /* Rule 8: Use longest matching prefix */ if (hiscore.rule < 8) hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); -- cgit v1.2.3 From 9f0ede52a0ebfe1fe99ee5bfd99d17e6ac0c503d Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Wed, 9 Nov 2005 13:08:29 -0800 Subject: [IPV6]: ip6ip6_lock is not unlocked in error path. From: Ken-ichirou MATSUZAWA Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e6b0e3954c02..e315d0f80af1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -525,6 +525,7 @@ ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + read_unlock(&ip6ip6_lock); kfree_skb(skb); return 0; } -- cgit v1.2.3 From 9fb9cbb1082d6b31fb45aa1a14432449a0df6cf1 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 9 Nov 2005 16:38:16 -0800 Subject: [NETFILTER]: Add nf_conntrack subsystem. The existing connection tracking subsystem in netfilter can only handle ipv4. There were basically two choices present to add connection tracking support for ipv6. We could either duplicate all of the ipv4 connection tracking code into an ipv6 counterpart, or (the choice taken by these patches) we could design a generic layer that could handle both ipv4 and ipv6 and thus requiring only one sub-protocol (TCP, UDP, etc.) connection tracking helper module to be written. In fact nf_conntrack is capable of working with any layer 3 protocol. The existing ipv4 specific conntrack code could also not deal with the pecularities of doing connection tracking on ipv6, which is also cured here. For example, these issues include: 1) ICMPv6 handling, which is used for neighbour discovery in ipv6 thus some messages such as these should not participate in connection tracking since effectively they are like ARP messages 2) fragmentation must be handled differently in ipv6, because the simplistic "defrag, connection track and NAT, refrag" (which the existing ipv4 connection tracking does) approach simply isn't feasible in ipv6 3) ipv6 extension header parsing must occur at the correct spots before and after connection tracking decisions, and there were no provisions for this in the existing connection tracking design 4) ipv6 has no need for stateful NAT The ipv4 specific conntrack layer is kept around, until all of the ipv4 specific conntrack helpers are ported over to nf_conntrack and it is feature complete. Once that occurs, the old conntrack stuff will get placed into the feature-removal-schedule and we will fully kill it off 6 months later. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv6/ip6_input.c | 5 + net/ipv6/ip6_output.c | 6 + net/ipv6/netfilter/Kconfig | 14 + net/ipv6/netfilter/Makefile | 6 + net/ipv6/netfilter/ip6t_MARK.c | 6 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 556 ++++++++++++++++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 272 ++++++++ net/ipv6/netfilter/nf_conntrack_reasm.c | 885 +++++++++++++++++++++++++ net/ipv6/raw.c | 4 +- 9 files changed, 1750 insertions(+), 4 deletions(-) create mode 100644 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c create mode 100644 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c create mode 100644 net/ipv6/netfilter/nf_conntrack_reasm.c (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6e3480426939..a6026d2787d2 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -176,6 +176,11 @@ resubmit: if (ipprot->flags & INET6_PROTO_FINAL) { struct ipv6hdr *hdr; + /* Free reference early: we don't need it any more, + and it may hold ip_conntrack module loaded + indefinitely. */ + nf_reset(skb); + skb_postpull_rcsum(skb, skb->nh.raw, skb->h.raw - skb->nh.raw); hdr = skb->nh.ipv6h; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index dbd9767b32e4..c1fa693511a1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) #ifdef CONFIG_NETFILTER to->nfmark = from->nfmark; /* Connection association is same as pre-frag packet */ + nf_conntrack_put(to->nfct); to->nfct = from->nfct; nf_conntrack_get(to->nfct); to->nfctinfo = from->nfctinfo; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(to->nfct_reasm); + to->nfct_reasm = from->nfct_reasm; + nf_conntrack_get_reasm(to->nfct_reasm); +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index bb7ccfe33f23..971ba60bf6e9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -278,5 +278,19 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NF_CONNTRACK_IPV6 + tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv6 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b2c370e8b1c..9ab5b2ca1f59 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o + +# objects for l3 independent conntrack +nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o + +# l3 independent conntrack +obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index 0c7584f92172..eab8fb864ee0 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -56,9 +56,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_mark_reg = { - .name = "MARK", - .target = target, +static struct ip6t_target ip6t_mark_reg = { + .name = "MARK", + .target = target, .checkentry = checkentry, .me = THIS_MODULE }; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c new file mode 100644 index 000000000000..e2c90b3a8074 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -0,0 +1,556 @@ +/* + * Copyright (C)2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - support Layer 3 protocol independent connection tracking. + * Based on the original ip_conntrack code which had the following + * copyright information: + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * 23 Mar 2004: Yasuyuki Kozakai @USAGI + * - add get_features() to support various size of conntrack + * structures. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); + +static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + u_int32_t _addrs[8], *ap; + + ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), + sizeof(_addrs), _addrs); + if (ap == NULL) + return 0; + + memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); + memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); + + return 1; +} + +static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); + memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); + + return 1; +} + +static int ipv6_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", + NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), + NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); +} + +static int ipv6_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* + * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c + * + * This function parses (probably truncated) exthdr set "hdr" + * of length "len". "nexthdrp" initially points to some place, + * where type of the first header can be found. + * + * It skips all well-known exthdrs, and returns pointer to the start + * of unparsable area i.e. the first header with unknown type. + * if success, *nexthdr is updated by type/protocol of this header. + * + * NOTES: - it may return pointer pointing beyond end of packet, + * if the last recognized header is truncated in the middle. + * - if packet is truncated, so that all parsed headers are skipped, + * it returns -1. + * - if packet is fragmented, return pointer of the fragment header. + * - ESP is unparsable for now and considered like + * normal payload protocol. + * - Note also special handling of AUTH header. Thanks to IPsec wizards. + */ + +int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, + int len) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return -1; + if (nexthdr == NEXTHDR_NONE) + break; + if (nexthdr == NEXTHDR_FRAGMENT) + break; + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *nexthdrp = nexthdr; + return start; +} + +static int +ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, + u_int8_t *protonum) +{ + unsigned int extoff; + unsigned char pnum; + int protoff; + + extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data; + pnum = (*pskb)->nh.ipv6h->nexthdr; + + protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, + (*pskb)->len - extoff); + + /* + * (protoff == (*pskb)->len) mean that the packet doesn't have no data + * except of IPv6 & ext headers. but it's tracked anyway. - YK + */ + if ((protoff < 0) || (protoff > (*pskb)->len)) { + DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); + NF_CT_STAT_INC(error); + NF_CT_STAT_INC(invalid); + return -NF_ACCEPT; + } + + *dataoff = protoff; + *protonum = pnum; + return NF_ACCEPT; +} + +static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) +{ + return NF_CT_F_BASIC; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(*pskb, &ctinfo); + if (ct && ct->helper) { + unsigned int ret, protoff; + unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1) + - (*pskb)->data; + unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr; + + protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, + (*pskb)->len - extoff); + if (protoff < 0 || protoff > (*pskb)->len || + pnum == NEXTHDR_FRAGMENT) { + DEBUGP("proto header not found\n"); + return NF_ACCEPT; + } + + ret = ct->helper->help(pskb, protoff, ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + + /* We've seen it coming out the other side: confirm it */ + + return nf_conntrack_confirm(pskb); +} + +extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); +extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, + struct net_device *in, + struct net_device *out, + int (*okfn)(struct sk_buff *)); +static unsigned int ipv6_defrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm; + + /* Previously seen (loopback)? */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + reasm = nf_ct_frag6_gather(*pskb); + + /* queued */ + if (reasm == NULL) + return NF_STOLEN; + + /* error occured or not fragmented */ + if (reasm == *pskb) + return NF_ACCEPT; + + nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + (struct net_device *)out, okfn); + + return NF_STOLEN; +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm = (*pskb)->nfct_reasm; + + /* This packet is fragmented and has reassembled packet. */ + if (reasm) { + /* Reassembled packet isn't parsed yet ? */ + if (!reasm->nfct) { + unsigned int ret; + + ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); + if (ret != NF_ACCEPT) + return ret; + } + nf_conntrack_get(reasm->nfct); + (*pskb)->nfct = reasm->nfct; + return NF_ACCEPT; + } + + return nf_conntrack_in(PF_INET6, hooknum, pskb); +} + +static unsigned int ipv6_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (net_ratelimit()) + printk("ipv6_conntrack_local: packet too short\n"); + return NF_ACCEPT; + } + return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ipv6_conntrack_defrag_ops = { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, +}; + +static struct nf_hook_ops ipv6_conntrack_in_ops = { + .hook = ipv6_conntrack_in, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ipv6_conntrack_local_out_ops = { + .hook = ipv6_conntrack_local, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, +}; + +/* Refragmenter; last chance. */ +static struct nf_hook_ops ipv6_conntrack_out_ops = { + .hook = ipv6_confirm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_LAST, +}; + +static struct nf_hook_ops ipv6_conntrack_local_in_ops = { + .hook = ipv6_confirm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_IN, + .priority = NF_IP6_PRI_LAST-1, +}; + +#ifdef CONFIG_SYSCTL + +/* From nf_conntrack_proto_icmpv6.c */ +extern unsigned long nf_ct_icmpv6_timeout; + +/* From nf_conntrack_frag6.c */ +extern unsigned long nf_ct_frag6_timeout; +extern unsigned long nf_ct_frag6_low_thresh; +extern unsigned long nf_ct_frag6_high_thresh; + +static struct ctl_table_header *nf_ct_ipv6_sysctl_header; + +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, + .procname = "nf_conntrack_icmpv6_timeout", + .data = &nf_ct_icmpv6_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, + .procname = "nf_conntrack_frag6_timeout", + .data = &nf_ct_frag6_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, + .procname = "nf_conntrack_frag6_low_thresh", + .data = &nf_ct_frag6_low_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, + .procname = "nf_conntrack_frag6_high_thresh", + .data = &nf_ct_frag6_high_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; +#endif + +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { + .l3proto = PF_INET6, + .name = "ipv6", + .pkt_to_tuple = ipv6_pkt_to_tuple, + .invert_tuple = ipv6_invert_tuple, + .print_tuple = ipv6_print_tuple, + .print_conntrack = ipv6_print_conntrack, + .prepare = ipv6_prepare, + .get_features = ipv6_get_features, + .me = THIS_MODULE, +}; + +extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6; +extern int nf_ct_frag6_init(void); +extern void nf_ct_frag6_cleanup(void); +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_ct_frag6_init(); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't initialize frag6.\n"); + goto cleanup_nothing; + } + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register tcp.\n"); + goto cleanup_frag6; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register udp.\n"); + goto cleanup_tcp; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register icmpv6.\n"); + goto cleanup_udp; + } + + ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register ipv6\n"); + goto cleanup_icmpv6; + } + + ret = nf_register_hook(&ipv6_conntrack_defrag_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register pre-routing defrag " + "hook.\n"); + goto cleanup_ipv6; + } + + ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local_out defrag " + "hook.\n"); + goto cleanup_defragops; + } + + ret = nf_register_hook(&ipv6_conntrack_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register pre-routing hook.\n"); + goto cleanup_defraglocalops; + } + + ret = nf_register_hook(&ipv6_conntrack_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local out hook.\n"); + goto cleanup_inops; + } + + ret = nf_register_hook(&ipv6_conntrack_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + + ret = nf_register_hook(&ipv6_conntrack_local_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local in hook.\n"); + goto cleanup_inoutandlocalops; + } + +#ifdef CONFIG_SYSCTL + nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_ipv6_sysctl_header == NULL) { + printk("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto cleanup_localinops; + } +#endif + return ret; + + cleanup: + synchronize_net(); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_ipv6_sysctl_header); + cleanup_localinops: +#endif + nf_unregister_hook(&ipv6_conntrack_local_in_ops); + cleanup_inoutandlocalops: + nf_unregister_hook(&ipv6_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ipv6_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ipv6_conntrack_in_ops); + cleanup_defraglocalops: + nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops); + cleanup_defragops: + nf_unregister_hook(&ipv6_conntrack_defrag_ops); + cleanup_ipv6: + nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + cleanup_icmpv6: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); + cleanup_udp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); + cleanup_tcp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); + cleanup_frag6: + nf_ct_frag6_cleanup(); + cleanup_nothing: + return ret; +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI "); + +static int __init init(void) +{ + need_nf_conntrack(); + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +void need_ip6_conntrack(void) +{ +} + +EXPORT_SYMBOL(need_ip6_conntrack); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c new file mode 100644 index 000000000000..c0f1da5497a9 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -0,0 +1,272 @@ +/* + * Copyright (C)2003,2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - ICMPv6 tracking support. Derived from the original ip_conntrack code + * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following + * copyright information: + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long nf_ct_icmpv6_timeout = 30*HZ; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct icmp6hdr _hdr, *hp; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + tuple->dst.u.icmp.type = hp->icmp6_type; + tuple->src.u.icmp.id = hp->icmp6_identifier; + tuple->dst.u.icmp.code = hp->icmp6_code; + + return 1; +} + +static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] = { + [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, + [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, + [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 + }; + + __u8 type = orig->dst.u.icmp.type - 128; + if (type >= sizeof(invmap) || !invmap[type]) + return 0; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int icmpv6_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +/* Print out the private part of the conntrack. */ +static int icmpv6_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmpv6_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* Try to delete connection immediately after all replies: + won't actually vanish as we still have skb, and del_timer + means this will only run once even if count hits zero twice + (theoretically possible with SMP) */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { + if (atomic_dec_and_test(&ct->proto.icmp.count) + && del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else { + atomic_inc(&ct->proto.icmp.count); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmpv6_new(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + static u_int8_t valid_new[] = { + [ICMPV6_ECHO_REQUEST - 128] = 1, + [ICMPV6_NI_QUERY - 128] = 1 + }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) { + /* Can't create a new ICMPv6 `conn' with this. */ + DEBUGP("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); + NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + atomic_set(&conntrack->proto.icmp.count, 0); + return 1; +} + +extern int +nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len); +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +static int +icmpv6_error_message(struct sk_buff *skb, + unsigned int icmp6off, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct nf_conntrack_tuple intuple, origtuple; + struct nf_conntrack_tuple_hash *h; + struct icmp6hdr _hdr, *hp; + unsigned int inip6off; + struct nf_conntrack_protocol *inproto; + u_int8_t inprotonum; + unsigned int inprotoff; + + NF_CT_ASSERT(skb->nfct == NULL); + + hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); + if (hp == NULL) { + DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); + return -NF_ACCEPT; + } + + inip6off = icmp6off + sizeof(_hdr); + if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), + &inprotonum, sizeof(inprotonum)) != 0) { + DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); + return -NF_ACCEPT; + } + inprotoff = nf_ct_ipv6_skip_exthdr(skb, + inip6off + sizeof(struct ipv6hdr), + &inprotonum, + skb->len - inip6off + - sizeof(struct ipv6hdr)); + + if ((inprotoff < 0) || (inprotoff > skb->len) || + (inprotonum == NEXTHDR_FRAGMENT)) { + DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); + return -NF_ACCEPT; + } + + inproto = nf_ct_find_proto(PF_INET6, inprotonum); + + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, + &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { + DEBUGP("icmpv6_error: Can't get tuple\n"); + return -NF_ACCEPT; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!nf_ct_invert_tuple(&intuple, &origtuple, + &nf_conntrack_l3proto_ipv6, inproto)) { + DEBUGP("icmpv6_error: Can't invert tuple\n"); + return -NF_ACCEPT; + } + + *ctinfo = IP_CT_RELATED; + + h = nf_conntrack_find_get(&intuple, NULL); + if (!h) { + DEBUGP("icmpv6_error: no match\n"); + return -NF_ACCEPT; + } else { + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; + skb->nfctinfo = *ctinfo; + return -NF_ACCEPT; +} + +static int +icmpv6_error(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +{ + struct icmp6hdr _ih, *icmp6h; + + icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); + if (icmp6h == NULL) { + if (LOG_INVALID(IPPROTO_ICMPV6)) + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + "nf_ct_icmpv6: short packet "); + return -NF_ACCEPT; + } + + if (hooknum != NF_IP6_PRE_ROUTING) + goto skipped; + + /* Ignore it if the checksum's bogus. */ + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_ICMPV6, + skb_checksum(skb, dataoff, + skb->len - dataoff, 0))) { + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + "nf_ct_icmpv6: ICMPv6 checksum failed\n"); + return -NF_ACCEPT; + } + +skipped: + + /* is not error message ? */ + if (icmp6h->icmp6_type >= 128) + return NF_ACCEPT; + + return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); +} + +struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_ICMPV6, + .name = "icmpv6", + .pkt_to_tuple = icmpv6_pkt_to_tuple, + .invert_tuple = icmpv6_invert_tuple, + .print_tuple = icmpv6_print_tuple, + .print_conntrack = icmpv6_print_conntrack, + .packet = icmpv6_packet, + .new = icmpv6_new, + .error = icmpv6_error, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c new file mode 100644 index 000000000000..7640b9bb7694 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -0,0 +1,885 @@ +/* + * IPv6 fragment reassembly for connection tracking + * + * Copyright (C)2004 USAGI/WIDE Project + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * Based on: net/ipv6/reassembly.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ +#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ +#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT + +int nf_ct_frag6_high_thresh = 256*1024; +int nf_ct_frag6_low_thresh = 192*1024; +int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; + +struct nf_ct_frag6_skb_cb +{ + struct inet6_skb_parm h; + int offset; + struct sk_buff *orig; +}; + +#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) + +struct nf_ct_frag6_queue +{ + struct nf_ct_frag6_queue *next; + struct list_head lru_list; /* lru list member */ + + __u32 id; /* fragment id */ + struct in6_addr saddr; + struct in6_addr daddr; + + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* expire timer */ + struct sk_buff *fragments; + int len; + int meat; + struct timeval stamp; + unsigned int csum; + __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 + __u16 nhoffset; + struct nf_ct_frag6_queue **pprev; +}; + +/* Hash table. */ + +#define FRAG6Q_HASHSZ 64 + +static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; +static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; +static u32 nf_ct_frag6_hash_rnd; +static LIST_HEAD(nf_ct_frag6_lru_list); +int nf_ct_frag6_nqueues = 0; + +static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) +{ + if (fq->next) + fq->next->pprev = fq->pprev; + *fq->pprev = fq->next; + list_del(&fq->lru_list); + nf_ct_frag6_nqueues--; +} + +static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) +{ + write_lock(&nf_ct_frag6_lock); + __fq_unlink(fq); + write_unlock(&nf_ct_frag6_lock); +} + +static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, + struct in6_addr *daddr) +{ + u32 a, b, c; + + a = saddr->s6_addr32[0]; + b = saddr->s6_addr32[1]; + c = saddr->s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += nf_ct_frag6_hash_rnd; + __jhash_mix(a, b, c); + + a += saddr->s6_addr32[3]; + b += daddr->s6_addr32[0]; + c += daddr->s6_addr32[1]; + __jhash_mix(a, b, c); + + a += daddr->s6_addr32[2]; + b += daddr->s6_addr32[3]; + c += id; + __jhash_mix(a, b, c); + + return c & (FRAG6Q_HASHSZ - 1); +} + +static struct timer_list nf_ct_frag6_secret_timer; +int nf_ct_frag6_secret_interval = 10 * 60 * HZ; + +static void nf_ct_frag6_secret_rebuild(unsigned long dummy) +{ + unsigned long now = jiffies; + int i; + + write_lock(&nf_ct_frag6_lock); + get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); + for (i = 0; i < FRAG6Q_HASHSZ; i++) { + struct nf_ct_frag6_queue *q; + + q = nf_ct_frag6_hash[i]; + while (q) { + struct nf_ct_frag6_queue *next = q->next; + unsigned int hval = ip6qhashfn(q->id, + &q->saddr, + &q->daddr); + + if (hval != i) { + /* Unlink. */ + if (q->next) + q->next->pprev = q->pprev; + *q->pprev = q->next; + + /* Relink to new hash chain. */ + if ((q->next = nf_ct_frag6_hash[hval]) != NULL) + q->next->pprev = &q->next; + nf_ct_frag6_hash[hval] = q; + q->pprev = &nf_ct_frag6_hash[hval]; + } + + q = next; + } + } + write_unlock(&nf_ct_frag6_lock); + + mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); +} + +atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); + +/* Memory Tracking Functions. */ +static inline void frag_kfree_skb(struct sk_buff *skb) +{ + atomic_sub(skb->truesize, &nf_ct_frag6_mem); + if (NFCT_FRAG6_CB(skb)->orig) + kfree_skb(NFCT_FRAG6_CB(skb)->orig); + + kfree_skb(skb); +} + +static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) +{ + atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + kfree(fq); +} + +static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) +{ + struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); + + if (!fq) + return NULL; + atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + return fq; +} + +/* Destruction primitives. */ + +/* Complete destruction of fq. */ +static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) +{ + struct sk_buff *fp; + + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); + + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp); + fp = xp; + } + + frag_free_queue(fq); +} + +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) +{ + if (atomic_dec_and_test(&fq->refcnt)) + nf_ct_frag6_destroy(fq); +} + +/* Kill fq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. + */ +static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +static void nf_ct_frag6_evictor(void) +{ + struct nf_ct_frag6_queue *fq; + struct list_head *tmp; + + for (;;) { + if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) + return; + read_lock(&nf_ct_frag6_lock); + if (list_empty(&nf_ct_frag6_lru_list)) { + read_unlock(&nf_ct_frag6_lock); + return; + } + tmp = nf_ct_frag6_lru_list.next; + fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); + + spin_lock(&fq->lock); + if (!(fq->last_in&COMPLETE)) + fq_kill(fq); + spin_unlock(&fq->lock); + + fq_put(fq); + } +} + +static void nf_ct_frag6_expire(unsigned long data) +{ + struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + + spin_lock(&fq->lock); + + if (fq->last_in & COMPLETE) + goto out; + + fq_kill(fq); + +out: + spin_unlock(&fq->lock); + fq_put(fq); +} + +/* Creation primitives. */ + + +static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, + struct nf_ct_frag6_queue *fq_in) +{ + struct nf_ct_frag6_queue *fq; + + write_lock(&nf_ct_frag6_lock); +#ifdef CONFIG_SMP + for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + if (fq->id == fq_in->id && + !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && + !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { + atomic_inc(&fq->refcnt); + write_unlock(&nf_ct_frag6_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in); + return fq; + } + } +#endif + fq = fq_in; + + if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) + atomic_inc(&fq->refcnt); + + atomic_inc(&fq->refcnt); + if ((fq->next = nf_ct_frag6_hash[hash]) != NULL) + fq->next->pprev = &fq->next; + nf_ct_frag6_hash[hash] = fq; + fq->pprev = &nf_ct_frag6_hash[hash]; + INIT_LIST_HEAD(&fq->lru_list); + list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + nf_ct_frag6_nqueues++; + write_unlock(&nf_ct_frag6_lock); + return fq; +} + + +static struct nf_ct_frag6_queue * +nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct nf_ct_frag6_queue *fq; + + if ((fq = frag_alloc_queue()) == NULL) { + DEBUGP("Can't alloc new queue\n"); + goto oom; + } + + memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); + + fq->id = id; + ipv6_addr_copy(&fq->saddr, src); + ipv6_addr_copy(&fq->daddr, dst); + + init_timer(&fq->timer); + fq->timer.function = nf_ct_frag6_expire; + fq->timer.data = (long) fq; + fq->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&fq->refcnt, 1); + + return nf_ct_frag6_intern(hash, fq); + +oom: + return NULL; +} + +static __inline__ struct nf_ct_frag6_queue * +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct nf_ct_frag6_queue *fq; + unsigned int hash = ip6qhashfn(id, src, dst); + + read_lock(&nf_ct_frag6_lock); + for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + if (fq->id == id && + !ipv6_addr_cmp(src, &fq->saddr) && + !ipv6_addr_cmp(dst, &fq->daddr)) { + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); + return fq; + } + } + read_unlock(&nf_ct_frag6_lock); + + return nf_ct_frag6_create(hash, id, src, dst); +} + + +static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, int nhoff) +{ + struct sk_buff *prev, *next; + int offset, end; + + if (fq->last_in & COMPLETE) { + DEBUGP("Allready completed\n"); + goto err; + } + + offset = ntohs(fhdr->frag_off) & ~0x7; + end = offset + (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + + if ((unsigned int)end > IPV6_MAXPLEN) { + DEBUGP("offset is too large.\n"); + return -1; + } + + if (skb->ip_summed == CHECKSUM_HW) + skb->csum = csum_sub(skb->csum, + csum_partial(skb->nh.raw, + (u8*)(fhdr + 1) - skb->nh.raw, + 0)); + + /* Is this the final fragment? */ + if (!(fhdr->frag_off & htons(IP6_MF))) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) { + DEBUGP("already received last fragment\n"); + goto err; + } + fq->last_in |= LAST_IN; + fq->len = end; + } else { + /* Check if the fragment is rounded to 8 bytes. + * Required by the RFC. + */ + if (end & 0x7) { + /* RFC2460 says always send parameter problem in + * this case. -DaveM + */ + DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); + return -1; + } + if (end > fq->len) { + /* Some bits beyond end -> corruption. */ + if (fq->last_in & LAST_IN) { + DEBUGP("last packet already reached.\n"); + goto err; + } + fq->len = end; + } + } + + if (end == offset) + goto err; + + /* Point into the IP datagram 'data' part. */ + if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { + DEBUGP("queue: message is too short.\n"); + goto err; + } + if (end-offset < skb->len) { + if (pskb_trim(skb, end - offset)) { + DEBUGP("Can't trim\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for (next = fq->fragments; next != NULL; next = next->next) { + if (NFCT_FRAG6_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; + } + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) { + DEBUGP("overlap\n"); + goto err; + } + if (!pskb_pull(skb, i)) { + DEBUGP("Can't pull\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + } + + /* Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + while (next && NFCT_FRAG6_CB(next)->offset < end) { + /* overlap is 'i' bytes */ + int i = end - NFCT_FRAG6_CB(next)->offset; + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + DEBUGP("Eat head of the overlapped parts.: %d", i); + if (!pskb_pull(next, i)) + goto err; + + /* next fragment */ + NFCT_FRAG6_CB(next)->offset += i; + fq->meat -= i; + if (next->ip_summed != CHECKSUM_UNNECESSARY) + next->ip_summed = CHECKSUM_NONE; + break; + } else { + struct sk_buff *free_it = next; + + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + fq->fragments = next; + + fq->meat -= free_it->len; + frag_kfree_skb(free_it); + } + } + + NFCT_FRAG6_CB(skb)->offset = offset; + + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + fq->fragments = skb; + + skb->dev = NULL; + skb_get_timestamp(skb, &fq->stamp); + fq->meat += skb->len; + atomic_add(skb->truesize, &nf_ct_frag6_mem); + + /* The first fragment. + * nhoffset is obtained from the first fragment, of course. + */ + if (offset == 0) { + fq->nhoffset = nhoff; + fq->last_in |= FIRST_IN; + } + write_lock(&nf_ct_frag6_lock); + list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + write_unlock(&nf_ct_frag6_lock); + return 0; + +err: + return -1; +} + +/* + * Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. + */ +static struct sk_buff * +nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) +{ + struct sk_buff *fp, *op, *head = fq->fragments; + int payload_len; + + fq_kill(fq); + + BUG_TRAP(head != NULL); + BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); + + /* Unfragmented part is taken from the first segment. */ + payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); + if (payload_len > IPV6_MAXPLEN) { + DEBUGP("payload len is too large.\n"); + goto out_oversize; + } + + /* Head of list must not be cloned. */ + if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + DEBUGP("skb is cloned but can't expand head"); + goto out_oom; + } + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ + if (skb_shinfo(head)->frag_list) { + struct sk_buff *clone; + int i, plen = 0; + + if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { + DEBUGP("Can't alloc skb\n"); + goto out_oom; + } + clone->next = head->next; + head->next = clone; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_shinfo(head)->frag_list = NULL; + for (i=0; inr_frags; i++) + plen += skb_shinfo(head)->frags[i].size; + clone->len = clone->data_len = head->data_len - plen; + head->data_len -= clone->len; + head->len -= clone->len; + clone->csum = 0; + clone->ip_summed = head->ip_summed; + + NFCT_FRAG6_CB(clone)->orig = NULL; + atomic_add(clone->truesize, &nf_ct_frag6_mem); + } + + /* We have to remove fragment header from datagram and to relocate + * header in order to calculate ICV correctly. */ + head->nh.raw[fq->nhoffset] = head->h.raw[0]; + memmove(head->head + sizeof(struct frag_hdr), head->head, + (head->data - head->head) - sizeof(struct frag_hdr)); + head->mac.raw += sizeof(struct frag_hdr); + head->nh.raw += sizeof(struct frag_hdr); + + skb_shinfo(head)->frag_list = head->next; + head->h.raw = head->data; + skb_push(head, head->data - head->nh.raw); + atomic_sub(head->truesize, &nf_ct_frag6_mem); + + for (fp=head->next; fp; fp = fp->next) { + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + atomic_sub(fp->truesize, &nf_ct_frag6_mem); + } + + head->next = NULL; + head->dev = dev; + skb_set_timestamp(head, &fq->stamp); + head->nh.ipv6h->payload_len = htons(payload_len); + + /* Yes, and fold redundant checksum back. 8) */ + if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); + + fq->fragments = NULL; + + /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ + fp = skb_shinfo(head)->frag_list; + if (NFCT_FRAG6_CB(fp)->orig == NULL) + /* at above code, head skb is divided into two skbs. */ + fp = fp->next; + + op = NFCT_FRAG6_CB(head)->orig; + for (; fp; fp = fp->next) { + struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; + + op->next = orig; + op = orig; + NFCT_FRAG6_CB(fp)->orig = NULL; + } + + return head; + +out_oversize: + if (net_ratelimit()) + printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); + goto out_fail; +out_oom: + if (net_ratelimit()) + printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); +out_fail: + return NULL; +} + +/* + * find the header just before Fragment Header. + * + * if success return 0 and set ... + * (*prevhdrp): the value of "Next Header Field" in the header + * just before Fragment Header. + * (*prevhoff): the offset of "Next Header Field" in the header + * just before Fragment Header. + * (*fhoff) : the offset of Fragment Header. + * + * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c + * + */ +static int +find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) +{ + u8 nexthdr = skb->nh.ipv6h->nexthdr; + u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; + int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; + int len = skb->len - start; + u8 prevhdr = NEXTHDR_IPV6; + + while (nexthdr != NEXTHDR_FRAGMENT) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (!ipv6_ext_hdr(nexthdr)) { + return -1; + } + if (len < (int)sizeof(struct ipv6_opt_hdr)) { + DEBUGP("too short\n"); + return -1; + } + if (nexthdr == NEXTHDR_NONE) { + DEBUGP("next header is none\n"); + return -1; + } + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + prevhdr = nexthdr; + prev_nhoff = start; + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + if (len < 0) + return -1; + + *prevhdrp = prevhdr; + *prevhoff = prev_nhoff; + *fhoff = start; + + return 0; +} + +struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) +{ + struct sk_buff *clone; + struct net_device *dev = skb->dev; + struct frag_hdr *fhdr; + struct nf_ct_frag6_queue *fq; + struct ipv6hdr *hdr; + int fhoff, nhoff; + u8 prevhdr; + struct sk_buff *ret_skb = NULL; + + /* Jumbo payload inhibits frag. header */ + if (skb->nh.ipv6h->payload_len == 0) { + DEBUGP("payload len = 0\n"); + return skb; + } + + if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) + return skb; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone == NULL) { + DEBUGP("Can't clone skb\n"); + return skb; + } + + NFCT_FRAG6_CB(clone)->orig = skb; + + if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { + DEBUGP("message is too short.\n"); + goto ret_orig; + } + + clone->h.raw = clone->data + fhoff; + hdr = clone->nh.ipv6h; + fhdr = (struct frag_hdr *)clone->h.raw; + + if (!(fhdr->frag_off & htons(0xFFF9))) { + DEBUGP("Invalid fragment offset\n"); + /* It is not a fragmented frame */ + goto ret_orig; + } + + if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) + nf_ct_frag6_evictor(); + + fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); + if (fq == NULL) { + DEBUGP("Can't find and can't create new queue\n"); + goto ret_orig; + } + + spin_lock(&fq->lock); + + if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + spin_unlock(&fq->lock); + DEBUGP("Can't insert skb to queue\n"); + fq_put(fq); + goto ret_orig; + } + + if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { + ret_skb = nf_ct_frag6_reasm(fq, dev); + if (ret_skb == NULL) + DEBUGP("Can't reassemble fragmented packets\n"); + } + spin_unlock(&fq->lock); + + fq_put(fq); + return ret_skb; + +ret_orig: + kfree_skb(clone); + return skb; +} + +void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, + struct net_device *in, struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *s, *s2; + + for (s = NFCT_FRAG6_CB(skb)->orig; s;) { + nf_conntrack_put_reasm(s->nfct_reasm); + nf_conntrack_get_reasm(skb); + s->nfct_reasm = skb; + + s2 = s->next; + NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, + NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + s = s2; + } + nf_conntrack_put_reasm(skb); +} + +int nf_ct_frag6_kfree_frags(struct sk_buff *skb) +{ + struct sk_buff *s, *s2; + + for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) { + + s2 = s->next; + kfree_skb(s); + } + + kfree_skb(skb); + + return 0; +} + +int nf_ct_frag6_init(void) +{ + nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + init_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild; + nf_ct_frag6_secret_timer.expires = jiffies + + nf_ct_frag6_secret_interval; + add_timer(&nf_ct_frag6_secret_timer); + + return 0; +} + +void nf_ct_frag6_cleanup(void) +{ + del_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_evictor(); +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a1265a320b11..651c79b41eeb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ - if (clone) + if (clone) { + nf_reset(clone); rawv6_rcv(sk, clone); + } } sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, IP6CB(skb)->iif); -- cgit v1.2.3 From a8f74b228826eef1cbe04a05647d61e896f5fd63 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 10 Nov 2005 02:25:52 +0100 Subject: [NETLINK]: Make netlink_callback->done() optional Most netlink families make no use of the done() callback, making it optional gets rid of all unnecessary dummy implementations. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 227e99ed510c..f7f42c3e96cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb) static int fib6_dump_done(struct netlink_callback *cb) { fib6_dump_end(cb); - return cb->done(cb); + return cb->done ? cb->done(cb) : 0; } int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.2.3