From 1b203c138c5a0d78aa34d1c4346ff6b32ac74869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 15:40:14 +0200 Subject: netfilter: xt_hashlimit: Add missing ULL suffixes for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): net/netfilter/xt_hashlimit.c: In function ‘user2credits’: net/netfilter/xt_hashlimit.c:476: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:478: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:480: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c: In function ‘rateinfo_recalc’: net/netfilter/xt_hashlimit.c:513: warning: integer constant is too large for ‘long’ type Fixes: 11d5f15723c9f39d ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates") Signed-off-by: Geert Uytterhoeven Acked-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2fab0c65aa94..b89b688e9d01 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) -#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision) return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, XT_HASHLIMIT_SCALE); } else { - if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY)) return div64_u64(user, XT_HASHLIMIT_SCALE_v2) * HZ * CREDITS_PER_JIFFY; -- cgit v1.2.3 From a8b1e36d0d1d6f51490e7adce35367ed6adb10e7 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Sun, 9 Oct 2016 13:49:02 +0000 Subject: netfilter: nft_dynset: fix element timeout for HZ != 1000 With HZ=100 element timeout in dynamic sets (i.e. flow tables) is 10 times higher than configured. Add proper conversion to/from jiffies, when interacting with userspace. I tested this on Linux 4.8.1, and it applies cleanly to current nf and nf-next trees. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e3b83c31da2e..517f08767a3c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -158,7 +158,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + tb[NFTA_DYNSET_TIMEOUT]))); } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -246,7 +247,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(priv->timeout)), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) -- cgit v1.2.3 From 4f76de5f237615643d2243582cee331815312ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Oct 2016 12:18:23 +0200 Subject: netfilter: conntrack: remove obsolete sysctl (nf_conntrack_events_retry_timeout) This entry has been removed in commit 9500507c6138. Fixes: 9500507c6138 ("netfilter: conntrack: remove timer from ecache extension") Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.txt | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 4fb51d32fccc..399e4e866a9c 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -33,24 +33,6 @@ nf_conntrack_events - BOOLEAN If this option is enabled, the connection tracking code will provide userspace with connection tracking events via ctnetlink. -nf_conntrack_events_retry_timeout - INTEGER (seconds) - default 15 - - This option is only relevant when "reliable connection tracking - events" are used. Normally, ctnetlink is "lossy", that is, - events are normally dropped when userspace listeners can't keep up. - - Userspace can request "reliable event mode". When this mode is - active, the conntrack will only be destroyed after the event was - delivered. If event delivery fails, the kernel periodically - re-tries to send the event to userspace. - - This is the maximum interval the kernel should use when re-trying - to deliver the destroy event. - - A higher number means there will be fewer delivery retries and it - will take longer for a backlog to be processed. - nf_conntrack_expect_max - INTEGER Maximum size of expectation table. Default value is nf_conntrack_buckets / 256. Minimum is 1. -- cgit v1.2.3 From 6d19375b58763fefc2f215fb45117d3353ced888 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 11 Oct 2016 21:03:45 +0800 Subject: netfilter: xt_NFLOG: fix unexpected truncated packet Justin and Chris spotted that iptables NFLOG target was broken when they upgraded the kernel to 4.8: "ulogd-2.0.5- IPs are no longer logged" or "results in segfaults in ulogd-2.0.5". Because "struct nf_loginfo li;" is a local variable, and flags will be filled with garbage value, not inited to zero. So if it contains 0x1, packets will not be logged to the userspace anymore. Fixes: 7643507fe8b5 ("netfilter: xt_NFLOG: nflog-range does not truncate packets") Reported-by: Justin Piszcz Reported-by: Chris Caputo Tested-by: Chris Caputo Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_NFLOG.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 018eed7e1ff1..8668a5c18dc3 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + li.u.ulog.flags = 0; if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; -- cgit v1.2.3 From f434ed0a00b232efc9843c869d05048c7cd79bb7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:09:22 +0800 Subject: netfilter: xt_ipcomp: add "ip[6]t_ipcomp" module alias name Otherwise, user cannot add related rules if xt_ipcomp.ko is not loaded: # iptables -A OUTPUT -p 108 -m ipcomp --ipcompspi 1 iptables: No chain/target/match by that name. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_ipcomp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 89d53104c6b3..000e70377f85 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -26,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fan Du "); MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); +MODULE_ALIAS("ipt_ipcomp"); +MODULE_ALIAS("ip6t_ipcomp"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool -- cgit v1.2.3 From 5751e175c60be1b4113d1e9bfa011b58ec01f104 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:10:45 +0800 Subject: netfilter: nft_hash: add missing NFTA_HASH_OFFSET's nla_policy Missing the nla_policy description will also miss the validation check in kernel. Fixes: 70ca767ea1b2 ("netfilter: nft_hash: Add hash offset value") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 09473b415b95..baf694de3935 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_LEN] = { .type = NLA_U32 }, [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, + [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, }; static int nft_hash_init(const struct nft_ctx *ctx, -- cgit v1.2.3 From 09525a09ad3099efd9ba49b0b90bddc350d6b53a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 12:14:29 +0300 Subject: netfilter: nf_tables: underflow in nft_parse_u32_check() We don't want to allow negatives here. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b70d3ea1430e..24db22257586 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, */ unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { - int val; + u32 val; val = ntohl(nla_get_be32(attr)); if (val > max) -- cgit v1.2.3 From 21a9e0f1568eaa0aad970c06e4cc8d77de8d9fa1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:09:12 +0300 Subject: netfilter: nft_exthdr: fix error handling in nft_exthdr_init() "err" needs to be signed for the error handling to work. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a84cf3d66056..47beb3abcc9d 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len, err; + u32 offset, len; + int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || -- cgit v1.2.3 From ccca6607c545534e5b74ef04e0f2f6ba11344be4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Oct 2016 08:42:17 +0200 Subject: netfilter: nft_range: validate operation netlink attribute Use nft_parse_u32_check() to make sure we don't get a value over the unsigned 8-bit integer. Moreover, make sure this value doesn't go over the two supported range comparison modes. Fixes: 9286c2eb1fda ("netfilter: nft_range: validate operation netlink attribute") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_range.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index c6d5358482d1..9bc4586c3006 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -59,6 +59,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr struct nft_range_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc_from, desc_to; int err; + u32 op; err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), &desc_from, tb[NFTA_RANGE_FROM_DATA]); @@ -80,7 +81,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr if (err < 0) goto err2; - priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op); + if (err < 0) + goto err2; + + switch (op) { + case NFT_RANGE_EQ: + case NFT_RANGE_NEQ: + break; + default: + err = -EINVAL; + goto err2; + } + + priv->op = op; priv->len = desc_from.len; return 0; err2: -- cgit v1.2.3 From d2e4d593516e877f1f6fb40031eb495f36606e16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:30 +0200 Subject: netfilter: nf_tables: avoid uninitialized variable warning The newly added nft_range_eval() function handles the two possible nft range operations, but as the compiler warning points out, any unexpected value would lead to the 'mismatch' variable being used without being initialized: net/netfilter/nft_range.c: In function 'nft_range_eval': net/netfilter/nft_range.c:45:5: error: 'mismatch' may be used uninitialized in this function [-Werror=maybe-uninitialized] This removes the variable in question and instead moves the condition into the switch itself, which is potentially more efficient than adding a bogus 'default' clause as in my first approach, and is nicer than using the 'uninitialized_var' macro. Fixes: 0f3cd9b36977 ("netfilter: nf_tables: add range expression") Link: http://patchwork.ozlabs.org/patch/677114/ Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_range.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9bc4586c3006..fbc88009ca2e 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); - bool mismatch; int d1, d2; d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); switch (priv->op) { case NFT_RANGE_EQ: - mismatch = (d1 < 0 || d2 > 0); + if (d1 < 0 || d2 > 0) + regs->verdict.code = NFT_BREAK; break; case NFT_RANGE_NEQ: - mismatch = (d1 >= 0 && d2 <= 0); + if (d1 >= 0 && d2 <= 0) + regs->verdict.code = NFT_BREAK; break; } - - if (mismatch) - regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { -- cgit v1.2.3 From 1ecc281ec2f52d715dfc8cb67b6820ea813b9e52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Oct 2016 21:50:23 +0200 Subject: netfilter: x_tables: suppress kmemcheck warning Markus Trippelsdorf reports: WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff88001e605480) 4055601e0088ffff000000000000000090686d81ffffffff0000000000000000 u u u u u u u u u u u u u u u u i i i i i i i i u u u u u u u u ^ |RIP: 0010:[] [] nf_register_net_hook+0x51/0x160 [..] [] nf_register_net_hook+0x51/0x160 [] nf_register_net_hooks+0x3f/0xa0 [] ipt_register_table+0xe5/0x110 [..] This warning is harmless; we copy 'uninitialized' data from the hook ops but it will not be used. Long term the structures keeping run-time data should be disentangled from those only containing config-time data (such as where in the list to insert a hook), but thats -next material. Reported-by: Markus Trippelsdorf Suggested-by: Al Viro Signed-off-by: Florian Westphal Acked-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e0aa7c1d0224..fc4977456c30 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) if (!num_hooks) return ERR_PTR(-EINVAL); - ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 7bb6615d395a7c130053728f3a64f6df6b2e1f18 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 18 Oct 2016 14:37:32 +0200 Subject: netfilter: conntrack: restart gc immediately if GC_MAX_EVICTS is reached When the maximum evictions number is reached, do not wait 5 seconds before the next run. CC: Florian Westphal Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ba6a1d421222..df2f5a3901df 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -983,7 +983,7 @@ static void gc_worker(struct work_struct *work) return; ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) next_run = 0; gc_work->last_bucket = i; -- cgit v1.2.3 From 7034b566a4e7d550621c2dfafd380b77b3787cd9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Oct 2016 18:05:32 +0100 Subject: netfilter: fix nf_queue handling nf_queue handling is broken since e3b37f11e6e4 ("netfilter: replace list_head with single linked list") for two reasons: 1) If the bypass flag is set on, there are no userspace listeners and we still have more hook entries to iterate over, then jump to the next hook. Otherwise accept the packet. On nf_reinject() path, the okfn() needs to be invoked. 2) We should not re-enter the same hook on packet reinjection. If the packet is accepted, we have to skip the current hook from where the packet was enqueued, otherwise the packets gets enqueued over and over again. This restores the previous list_for_each_entry_continue() behaviour happening from nf_iterate() that was dealing with these two cases. This patch introduces a new nf_queue() wrapper function so this fix becomes simpler. Fixes: e3b37f11e6e4 ("netfilter: replace list_head with single linked list") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 13 +++--------- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 48 +++++++++++++++++++++++++++++--------------- 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fcb5d1df11e9..004af030ef1a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -361,16 +361,9 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err; - - RCU_INIT_POINTER(state->hook_entries, entry); - err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) - goto next_hook; - kfree_skb(skb); - } + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) + goto next_hook; } return ret; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e0adb5959342..9fdb655f85bc 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,7 +18,7 @@ unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - unsigned int queuenum); + struct nf_hook_entry **entryp, unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 96964a0070e1..8f08d759844a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) rcu_read_unlock(); } -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -int nf_queue(struct sk_buff *skb, - struct nf_hook_state *state, - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -161,6 +156,27 @@ err: return status; } +/* Packets leaving via this function must come back through nf_reinject(). */ +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp, unsigned int verdict) +{ + struct nf_hook_entry *entry = *entryp; + int ret; + + RCU_INIT_POINTER(state->hook_entries, entry); + ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + if (ret < 0) { + if (ret == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { + *entryp = rcu_dereference(entry->next); + return 1; + } + kfree_skb(skb); + } + + return 0; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry; @@ -187,26 +203,26 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) entry->state.thresh = INT_MIN; if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + hook_entry = rcu_dereference(hook_entry->next); + if (hook_entry) +next_hook: + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: +okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); - err = nf_queue(skb, &entry->state, - verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + err = nf_queue(skb, &entry->state, &hook_entry, verdict); + if (err == 1) { + if (hook_entry) goto next_hook; - kfree_skb(skb); + goto okfn; } break; case NF_STOLEN: -- cgit v1.2.3