summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@netfilter.org>2024-01-29 10:57:01 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-02-23 09:12:49 +0100
commit653bc5e6d9995d7d5f497c665b321875a626161c (patch)
treea22d226db5cf38ea5aabc550d6b44f30e8019abe /net
parente7d2e87abc6fddec0e661f9fd8c1657db42d674b (diff)
netfilter: ipset: fix performance regression in swap operation
commit 97f7cf1cd80eeed3b7c808b7c12463295c751001 upstream. The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani <ale.crismani@automattic.com> Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h14
-rw-r--r--net/netfilter/ipset/ip_set_core.c37
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h15
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
4 files changed, 61 insertions, 18 deletions
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e9612d8..9523104a90da 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -28,6 +28,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d47dfdcb899b..bddd1131e825 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1184,6 +1184,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1195,8 +1203,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1208,8 +1214,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1223,6 +1231,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1230,6 +1240,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1240,10 +1253,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1396,9 +1415,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2411,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7499192af586..48fda58b91a0 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -210,6 +210,7 @@ htable_size(u8 hbits)
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -254,6 +255,7 @@ htable_size(u8 hbits)
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -437,9 +439,6 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
@@ -586,6 +585,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1416,6 +1424,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a67f7966574..6bc7019982b0 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void