diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-09-11 17:37:09 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-09-11 17:40:13 -0700 |
| commit | fc3a2810412c163b5df1b377d332e048860f45db (patch) | |
| tree | 9eeb81c7f965176a32ca3062aefcc3532c637b01 /net/netfilter | |
| parent | 5f790208d68fe1526c751dc2af366c7b552b8631 (diff) | |
| parent | db87bd2ad1f736c2f7ab231f9b40c885934f6b2c (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.17-rc6).
Conflicts:
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_pipapo_avx2.c
c4eaca2e1052 ("netfilter: nft_set_pipapo: don't check genbit from packetpath lookups")
84c1da7b38d9 ("netfilter: nft_set_pipapo: use avx2 algorithm for insertions too")
Only trivial adjacent changes (in a doc and a Makefile).
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/netfilter')
| -rw-r--r-- | net/netfilter/nf_tables_api.c | 66 | ||||
| -rw-r--r-- | net/netfilter/nft_lookup.c | 46 | ||||
| -rw-r--r-- | net/netfilter/nft_set_bitmap.c | 3 | ||||
| -rw-r--r-- | net/netfilter/nft_set_pipapo.c | 20 | ||||
| -rw-r--r-- | net/netfilter/nft_set_pipapo_avx2.c | 3 | ||||
| -rw-r--r-- | net/netfilter/nft_set_rbtree.c | 6 |
6 files changed, 99 insertions, 45 deletions
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 68e273d8821a..eed434e0a970 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1123,11 +1123,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, return ERR_PTR(-ENOENT); } -static __be16 nft_base_seq(const struct net *net) +static unsigned int nft_base_seq(const struct net *net) { - struct nftables_pernet *nft_net = nft_pernet(net); + return READ_ONCE(net->nft.base_seq); +} - return htons(nft_net->base_seq & 0xffff); +static __be16 nft_base_seq_be16(const struct net *net) +{ + return htons(nft_base_seq(net) & 0xffff); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -1147,7 +1150,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -1240,7 +1243,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -2022,7 +2025,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -2125,7 +2128,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -3663,7 +3666,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, - nft_base_seq(net)); + nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -3831,7 +3834,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -4042,7 +4045,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb, buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_RULE_TABLE]), (char *)nla_data(nla[NFTA_RULE_TABLE]), - nft_net->base_seq); + nft_base_seq(net)); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); kfree(buf); @@ -4879,7 +4882,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), flags, ctx->family, NFNETLINK_V0, - nft_base_seq(ctx->net)); + nft_base_seq_be16(ctx->net)); if (!nlh) goto nla_put_failure; @@ -5024,7 +5027,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && @@ -6201,7 +6204,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && @@ -6230,7 +6233,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) seq = cb->nlh->nlmsg_seq; nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, - table->family, NFNETLINK_V0, nft_base_seq(net)); + table->family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -6323,7 +6326,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, - NFNETLINK_V0, nft_base_seq(ctx->net)); + NFNETLINK_V0, nft_base_seq_be16(ctx->net)); if (!nlh) goto nla_put_failure; @@ -6622,7 +6625,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb, } nelems++; } - audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems); + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems); out_unlock: rcu_read_unlock(); @@ -8372,7 +8375,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -8437,7 +8440,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -8471,7 +8474,7 @@ cont: idx++; } if (ctx->reset && entries) - audit_log_obj_reset(table, nft_net->base_seq, entries); + audit_log_obj_reset(table, nft_base_seq(net), entries); if (rc < 0) break; } @@ -8640,7 +8643,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb, buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_OBJ_TABLE]), (char *)nla_data(nla[NFTA_OBJ_TABLE]), - nft_net->base_seq); + nft_base_seq(net)); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); kfree(buf); @@ -8745,9 +8748,8 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, u16 flags, int family, int report, gfp_t gfp) { - struct nftables_pernet *nft_net = nft_pernet(net); char *buf = kasprintf(gfp, "%s:%u", - table->name, nft_net->base_seq); + table->name, nft_base_seq(net)); audit_log_nfcfg(buf, family, @@ -9433,7 +9435,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -9502,7 +9504,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -9687,17 +9689,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { - struct nftables_pernet *nft_net = nft_pernet(net); struct nlmsghdr *nlh; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, - NFNETLINK_V0, nft_base_seq(net)); + NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -10959,11 +10960,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - base_seq = READ_ONCE(nft_net->base_seq); + base_seq = nft_base_seq(net); while (++base_seq == 0) ; - WRITE_ONCE(nft_net->base_seq, base_seq); + /* pairs with smp_load_acquire in nft_lookup_eval */ + smp_store_release(&net->nft.base_seq, base_seq); gc_seq = nft_gc_seq_begin(nft_net); @@ -11172,7 +11174,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); - nf_tables_commit_audit_log(&adl, nft_net->base_seq); + nf_tables_commit_audit_log(&adl, nft_base_seq(net)); nft_gc_seq_end(nft_net, gc_seq); nft_net->validate_state = NFT_VALIDATE_SKIP; @@ -11497,7 +11499,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid) mutex_lock(&nft_net->commit_mutex); nft_net->tstamp = get_jiffies_64(); - genid_ok = genid == 0 || nft_net->base_seq == genid; + genid_ok = genid == 0 || nft_base_seq(net) == genid; if (!genid_ok) mutex_unlock(&nft_net->commit_mutex); @@ -12134,7 +12136,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); - nft_net->base_seq = 1; + net->nft.base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 40c602ffbcba..58c5b14889c4 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -24,11 +24,11 @@ struct nft_lookup { struct nft_set_binding binding; }; -#ifdef CONFIG_MITIGATION_RETPOLINE -const struct nft_set_ext * -nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) +static const struct nft_set_ext * +__nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { +#ifdef CONFIG_MITIGATION_RETPOLINE if (set->ops == &nft_set_hash_fast_type.ops) return nft_hash_lookup_fast(net, set, key); if (set->ops == &nft_set_hash_type.ops) @@ -51,10 +51,46 @@ nft_set_do_lookup(const struct net *net, const struct nft_set *set, return nft_rbtree_lookup(net, set, key); WARN_ON_ONCE(1); +#endif return set->ops->lookup(net, set, key); } + +static unsigned int nft_base_seq(const struct net *net) +{ + /* pairs with smp_store_release() in nf_tables_commit() */ + return smp_load_acquire(&net->nft.base_seq); +} + +static bool nft_lookup_should_retry(const struct net *net, unsigned int seq) +{ + return unlikely(seq != nft_base_seq(net)); +} + +const struct nft_set_ext * +nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) +{ + const struct nft_set_ext *ext; + unsigned int base_seq; + + do { + base_seq = nft_base_seq(net); + + ext = __nft_set_do_lookup(net, set, key); + if (ext) + break; + /* No match? There is a small chance that lookup was + * performed in the old generation, but nf_tables_commit() + * already unlinked a (matching) element. + * + * We need to repeat the lookup to make sure that we didn't + * miss a matching element in the new generation. + */ + } while (nft_lookup_should_retry(net, base_seq)); + + return ext; +} EXPORT_SYMBOL_GPL(nft_set_do_lookup); -#endif void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index c24c922f895d..8d3f040a904a 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -226,7 +226,8 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; - list_for_each_entry_rcu(be, &priv->list, head) { + list_for_each_entry_rcu(be, &priv->list, head, + lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) { if (iter->count < iter->skip) goto cont; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4b64c3bd8e70..a7b8fa8cab7c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -549,6 +549,23 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, * * This function is called from the data path. It will search for * an element matching the given key in the current active copy. + * Unlike other set types, this uses NFT_GENMASK_ANY instead of + * nft_genmask_cur(). + * + * This is because new (future) elements are not reachable from + * priv->match, they get added to priv->clone instead. + * When the commit phase flips the generation bitmask, the + * 'now old' entries are skipped but without the 'now current' + * elements becoming visible. Using nft_genmask_cur() thus creates + * inconsistent state: matching old entries get skipped but thew + * newly matching entries are unreachable. + * + * GENMASK will still find the 'now old' entries which ensures consistent + * priv->match view. + * + * nft_pipapo_commit swaps ->clone and ->match shortly after the + * genbit flip. As ->clone doesn't contain the old entries in the first + * place, lookup will only find the now-current ones. * * Return: ntables API extension pointer or NULL if no match. */ @@ -557,12 +574,11 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); - u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_elem *e; m = rcu_dereference(priv->match); - e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64()); + e = pipapo_get_slow(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64()); return e ? &e->ext : NULL; } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 7559306d0aed..27dab3667548 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1275,7 +1275,6 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); - u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const u8 *rp = (const u8 *)key; const struct nft_pipapo_elem *e; @@ -1293,7 +1292,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, m = rcu_dereference(priv->match); - e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64()); + e = pipapo_get_avx2(m, rp, NFT_GENMASK_ANY, get_jiffies_64()); local_bh_enable(); return e ? &e->ext : NULL; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index b311b66df3e9..ca594161b840 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -77,7 +77,9 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; - interval = rbe; + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_rbtree_elem_expired(rbe)) + interval = rbe; } else if (d > 0) parent = rcu_dereference_raw(parent->rb_right); else { @@ -102,8 +104,6 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } if (set->flags & NFT_SET_INTERVAL && interval != NULL && - nft_set_elem_active(&interval->ext, genmask) && - !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) return &interval->ext; |
