summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_tables.h7
-rw-r--r--net/netfilter/nf_tables_api.c45
-rw-r--r--net/netfilter/nft_set_hash.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c62
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netfilter/nft_set_rbtree.c8
6 files changed, 92 insertions, 33 deletions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 426534a711b0..e2d2bfc1f989 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -320,11 +320,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
* @NFT_ITER_UNSPEC: unspecified, to catch errors
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element
*/
enum nft_iter_type {
NFT_ITER_UNSPEC,
NFT_ITER_READ,
NFT_ITER_UPDATE,
+ NFT_ITER_UPDATE_CLONE,
};
struct nft_set;
@@ -1861,6 +1863,11 @@ struct nft_trans_gc {
struct rcu_head rcu;
};
+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
static inline void nft_ctx_update(struct nft_ctx *ctx,
const struct nft_trans *trans)
{
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fd7f7e4e2a43..1862bd7fe804 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -833,6 +833,11 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
}
}
+/* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation
+ * phase, the set clone might already exist from a previous command, or it might
+ * be a set that is going away and does not require a clone. The netns and
+ * netlink release paths also need to work on the live set.
+ */
static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
@@ -7170,6 +7175,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
+ bool set_full = false;
u64 expiration;
u64 timeout;
int err, i;
@@ -7461,10 +7467,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_elem_free;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = nft_set_maxsize(set), nelems;
+
+ nelems = atomic_inc_return(&set->nelems);
+ if (nelems > max)
+ set_full = true;
+ }
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
- goto err_elem_free;
+ goto err_set_size;
}
ext->genmask = nft_genmask_cur(ctx->net);
@@ -7516,7 +7530,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ue->priv = elem_priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- goto err_elem_free;
+ goto err_set_size;
}
}
}
@@ -7534,23 +7548,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL)) {
- unsigned int max = nft_set_maxsize(set);
-
- if (!atomic_add_unless(&set->nelems, 1, max)) {
- err = -ENFILE;
- goto err_set_full;
- }
- }
-
nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
nft_trans_commit_list_add_elem(ctx->net, trans);
- return 0;
-err_set_full:
- nft_setelem_remove(ctx->net, set, elem.priv);
+ return set_full ? -ENFILE : 0;
+
err_element_clash:
kfree(trans);
+err_set_size:
+ if (!(flags & NFT_SET_ELEM_CATCHALL))
+ atomic_dec(&set->nelems);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
@@ -7901,9 +7908,12 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
+ /* The set backend might need to clone the set, do it now from the
+ * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type.
+ */
struct nft_set_iter iter = {
.genmask = genmask,
- .type = NFT_ITER_UPDATE,
+ .type = NFT_ITER_UPDATE_CLONE,
.fn = nft_setelem_flush,
};
@@ -10483,11 +10493,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
schedule_work(&trans_gc_work);
}
-static int nft_trans_gc_space(struct nft_trans_gc *trans)
-{
- return NFT_TRANS_GC_BATCHCOUNT - trans->count;
-}
-
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 739b992bde59..b0e571c8e3f3 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -374,6 +374,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
switch (iter->type) {
case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
/* only relevant for netlink dumps which use READ type */
WARN_ON_ONCE(iter->skip != 0);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 7ef4b44471d3..a34632ae6048 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1680,11 +1680,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
}
/**
- * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * pipapo_gc_scan() - Drop expired entries from set and link them to gc list
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
@@ -1697,6 +1697,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
if (!gc)
return;
+ list_add(&gc->list, &priv->gc_head);
+
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const struct nft_pipapo_field *f;
@@ -1724,9 +1726,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- return;
+ if (!nft_trans_gc_space(gc)) {
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
+
+ list_add(&gc->list, &priv->gc_head);
+ }
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1740,10 +1746,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall_sync(gc);
+ priv->last_gc = jiffies;
+}
+
+/**
+ * pipapo_gc_queue() - Free expired elements
+ * @set: nftables API set representation
+ */
+static void pipapo_gc_queue(struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_trans_gc *gc, *next;
+
+ /* always do a catchall cycle: */
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ if (gc)
+ nft_trans_gc_queue_sync_done(gc);
+ }
+
+ /* always purge queued gc elements. */
+ list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
+ list_del(&gc->list);
nft_trans_gc_queue_sync_done(gc);
- priv->last_gc = jiffies;
}
}
@@ -1797,6 +1823,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
*
* We also need to create a new working copy for subsequent insertions and
* deletions.
+ *
+ * After the live copy has been replaced by the clone, we can safely queue
+ * expired elements that have been collected by pipapo_gc_scan() for
+ * memory reclaim.
*/
static void nft_pipapo_commit(struct nft_set *set)
{
@@ -1807,7 +1837,7 @@ static void nft_pipapo_commit(struct nft_set *set)
return;
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ pipapo_gc_scan(set, priv->clone);
old = rcu_replace_pointer(priv->match, priv->clone,
nft_pipapo_transaction_mutex_held(set));
@@ -1815,6 +1845,8 @@ static void nft_pipapo_commit(struct nft_set *set)
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
+
+ pipapo_gc_queue(set);
}
static void nft_pipapo_abort(const struct nft_set *set)
@@ -2144,13 +2176,20 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_pipapo_match *m;
switch (iter->type) {
- case NFT_ITER_UPDATE:
+ case NFT_ITER_UPDATE_CLONE:
m = pipapo_maybe_clone(set);
if (!m) {
iter->err = -ENOMEM;
return;
}
-
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_UPDATE:
+ if (priv->clone)
+ m = priv->clone;
+ else
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
nft_pipapo_do_walk(ctx, set, m, iter);
break;
case NFT_ITER_READ:
@@ -2272,6 +2311,7 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
+ INIT_LIST_HEAD(&priv->gc_head);
rcu_assign_pointer(priv->match, m);
return 0;
@@ -2321,6 +2361,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
+ WARN_ON_ONCE(!list_empty(&priv->gc_head));
+
m = rcu_dereference_protected(priv->match, true);
if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index eaab422aa56a..9aee9a9eaeb7 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
* @last_gc: Timestamp of last garbage collection run, jiffies
+ * @gc_head: list of nft_trans_gc to queue up for mem reclaim
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
unsigned long last_gc;
+ struct list_head gc_head;
};
struct nft_pipapo_elem;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 3f02e4478216..ee3d4f5b9ff7 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -861,13 +861,15 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_rbtree *priv = nft_set_priv(set);
switch (iter->type) {
- case NFT_ITER_UPDATE:
- lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
-
+ case NFT_ITER_UPDATE_CLONE:
if (nft_array_may_resize(set) < 0) {
iter->err = -ENOMEM;
break;
}
+ fallthrough;
+ case NFT_ITER_UPDATE:
+ lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+
nft_rbtree_do_walk(ctx, set, iter);
break;
case NFT_ITER_READ: