summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-02-25 19:36:28 -0800
committerJakub Kicinski <kuba@kernel.org>2026-02-25 19:36:29 -0800
commit2cd63825c7c6105bc298474fbb073d25f48114cf (patch)
tree971376082b3d8f2783a97ff8b6f2e2fe4983094a /include
parent7717fbb14028be5735acb911aeb7553b7c662418 (diff)
parent6b94d081f81dd524626f7aab2b98a9de335edb72 (diff)
Merge branch 'netfilter-updates-for-net-next'
Florian Westphal says: ==================== netfilter: updates for net-next including IPVS updates from and via Julian Anastasov. First updates for IPVS. From Julians cover-letter: * Convert the global __ip_vs_mutex to per-net service_mutex and switch the service tables to be per-net, cowork by Jiejian Wu and Dust Li * Convert some code that walks the service lists to use RCU instead of the service_mutex * We used two tables for services (non-fwmark and fwmark), merge them into single svc_table * The list for unavailable destinations (dest_trash) holds dsts and thus dev references causing extra work for the ip_vs_dst_event() dev notifier handler. Change this by dropping the reference when dest is removed and saved into dest_trash. The dest_trash will need more changes to make it light for lookups. TODO. * On new connection we can do multiple lookups for services by trying different fallback options. Add more counters for service types, so that we can avoid unneeded lookups for services. * The no_cport and dropentry counters can be per-net and also we can avoid extra conn lookups Then, a few cleanups for nf_tables: * keep BH enabled during nft_set_rbtree inserts, this is possible because the root lock is now only taken from control plane. * toss a few EXPORT_SYMBOLs from nf_tables; these were historic leftovers from back in the day when e.g. set backends were still residing in their own modules. * remove the register tracking infra from nftables. It was disabled years ago in 5.18 and there are no plans to salvage this work; the idea was good (remove redundant register stores), but there is just one too many pitfalls, and better rule structuring (verdict maps) largely avoids the scenarios where this would have helped. ==================== Link: https://patch.msgid.link/20260224205048.4718-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/ip_vs.h39
-rw-r--r--include/net/netfilter/nf_tables.h32
-rw-r--r--include/net/netfilter/nft_fib.h2
-rw-r--r--include/net/netfilter/nft_meta.h3
4 files changed, 31 insertions, 45 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29a36709e7f3..ad8a16146ac5 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -33,6 +33,12 @@
#define IP_VS_HDR_INVERSE 1
#define IP_VS_HDR_ICMP 2
+/*
+ * Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
/* Generic access of ipvs struct */
static inline struct netns_ipvs *net_ipvs(struct net* net)
@@ -265,6 +271,18 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)
+/* For arrays per family */
+enum {
+ IP_VS_AF_INET,
+ IP_VS_AF_INET6,
+ IP_VS_AF_MAX
+};
+
+static inline int ip_vs_af_index(int af)
+{
+ return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET;
+}
+
/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
@@ -673,8 +691,7 @@ struct ip_vs_dest_user_kern {
* forwarding entries.
*/
struct ip_vs_service {
- struct hlist_node s_list; /* for normal service table */
- struct hlist_node f_list; /* for fwmark-based service table */
+ struct hlist_node s_list; /* node in service table */
atomic_t refcnt; /* reference counter */
u16 af; /* address family */
@@ -931,21 +948,22 @@ struct netns_ipvs {
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
+ atomic_t no_cport_conns[IP_VS_AF_MAX];
/* ip_vs_ctl */
struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */
- int num_services; /* no of virtual services */
- int num_services6; /* IPv6 virtual services */
-
/* Trash for destinations */
struct list_head dest_trash;
spinlock_t dest_trash_lock;
struct timer_list dest_trash_timer; /* expiration timer */
/* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
- atomic_t conn_out_counter;
+ atomic_t num_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t nonfwm_services[IP_VS_AF_MAX];/* Services */
+ atomic_t ftpsvc_counter[IP_VS_AF_MAX]; /* FTPPORT */
+ atomic_t nullsvc_counter[IP_VS_AF_MAX];/* Zero port */
+ atomic_t conn_out_counter[IP_VS_AF_MAX];/* out conn */
#ifdef CONFIG_SYSCTL
/* delayed work for expiring no dest connections */
@@ -956,6 +974,7 @@ struct netns_ipvs {
int drop_counter;
int old_secure_tcp;
atomic_t dropentry;
+ s8 dropentry_counters[8];
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
@@ -1041,6 +1060,10 @@ struct netns_ipvs {
*/
unsigned int mixed_address_family_dests;
unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */
+
+ /* the service mutex that protect svc_table and svc_fwm_table */
+ struct mutex service_mutex;
+ struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; /* Services */
};
#define DEFAULT_SYNC_THRESHOLD 3
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 426534a711b0..40e8106e71f0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -122,17 +122,6 @@ struct nft_regs {
};
};
-struct nft_regs_track {
- struct {
- const struct nft_expr *selector;
- const struct nft_expr *bitwise;
- u8 num_reg;
- } regs[NFT_REG32_NUM];
-
- const struct nft_expr *cur;
- const struct nft_expr *last;
-};
-
/* Store/load an u8, u16 or u64 integer to/from the u32 data register.
*
* Note, when using concatenations, register allocation happens at 32-bit
@@ -427,8 +416,6 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr, bool reset);
-bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
- const struct nft_expr *expr);
struct nft_set_ext;
@@ -941,7 +928,6 @@ struct nft_offload_ctx;
* @destroy_clone: destruction clone function
* @dump: function to dump parameters
* @validate: validate expression, called during loop detection
- * @reduce: reduce expression
* @gc: garbage collection expression
* @offload: hardware offload expression
* @offload_action: function to report true/false to allocate one slot or not in the flow
@@ -975,8 +961,6 @@ struct nft_expr_ops {
bool reset);
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
- bool (*reduce)(struct nft_regs_track *track,
- const struct nft_expr *expr);
bool (*gc)(struct net *net,
const struct nft_expr *expr);
int (*offload)(struct nft_offload_ctx *ctx,
@@ -1954,20 +1938,4 @@ static inline u64 nft_net_tstamp(const struct net *net)
return nft_pernet(net)->tstamp;
}
-#define __NFT_REDUCE_READONLY 1UL
-#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
-
-void nft_reg_track_update(struct nft_regs_track *track,
- const struct nft_expr *expr, u8 dreg, u8 len);
-void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len);
-void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg);
-
-static inline bool nft_reg_track_cmp(struct nft_regs_track *track,
- const struct nft_expr *expr, u8 dreg)
-{
- return track->regs[dreg].selector &&
- track->regs[dreg].selector->ops == expr->ops &&
- track->regs[dreg].num_reg == 0;
-}
-
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 7370fba844ef..e0422456f27b 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -66,6 +66,4 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct net_device *dev);
-bool nft_fib_reduce(struct nft_regs_track *track,
- const struct nft_expr *expr);
#endif
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index d602263590fe..f74e63290603 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -43,9 +43,6 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx,
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr);
-bool nft_meta_get_reduce(struct nft_regs_track *track,
- const struct nft_expr *expr);
-
struct nft_inner_tun_ctx;
void nft_meta_inner_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt,