From 023223dfbfb34fcc9b7dd41e21fbf9a5d5237989 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Dec 2021 20:37:34 +0100 Subject: netfilter: nf_tables: make counter support built-in Make counter support built-in to allow for direct call in case of CONFIG_RETPOLINE. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 0fa5a6d98a00..b6fb1fdff9b2 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,7 @@ extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_bitwise_type; extern struct nft_expr_type nft_byteorder_type; @@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; #endif +extern struct nft_object_type nft_counter_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); +void nft_counter_init_seqcount(void); + struct nft_expr; struct nft_regs; struct nft_pktinfo; @@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From 4a6fbdd801e882ee6ca5cdfdc3374f0ae263174c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Dec 2021 11:29:56 +0100 Subject: netfilter: conntrack: tag conntracks picked up in local out hook This allows to identify flows that originate from local machine in a followup patch. It would be possible to make this a ->status bit instead. For now I did not do that yet because I don't have a use-case for exposing this info to userspace. If one comes up the toggle can be replaced with a status bit. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d24b0a34c8f0..871489df63c6 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -95,6 +95,7 @@ struct nf_conn { unsigned long status; u16 cpu; + u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) -- cgit v1.2.3 From 613a0c67d12f33dcbeec2836f5fe60d05b4c18c0 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sun, 26 Dec 2021 01:12:41 +0800 Subject: netfilter: conntrack: Use max() instead of doing it manually Fix following coccicheck warning: ./include/net/netfilter/nf_conntrack.h:282:16-17: WARNING opportunity for max(). Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 871489df63c6..a4a14f3a5e38 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -279,7 +279,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; - return timeout > 0 ? timeout : 0; + return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) -- cgit v1.2.3 From 719774377622bc4025d2a74f551b5dc2158c6c30 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:22 +0100 Subject: netfilter: conntrack: convert to refcount_t api Convert nf_conn reference counting from atomic_t to refcount_t based api. refcount_t api provides more runtime sanity checks and will warn on certain constructs, e.g. refcount_inc() on a zero reference count, which usually indicates use-after-free. For this reason template allocation is changed to init the refcount to 1, the subsequenct add operations are removed. Likewise, init_conntrack() is changed to set the initial refcount to 1 instead refcount_inc(). This is safe because the new entry is not (yet) visible to other cpus. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 700ea077ce2d..a03f7a80b9ab 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -2,7 +2,7 @@ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H -#include +#include #include struct ip_conntrack_stat { @@ -25,19 +25,19 @@ struct ip_conntrack_stat { #define NFCT_PTRMASK ~(NFCT_INFOMASK) struct nf_conntrack { - atomic_t use; + refcount_t use; }; void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) { - if (nfct && atomic_dec_and_test(&nfct->use)) + if (nfct && refcount_dec_and_test(&nfct->use)) nf_conntrack_destroy(nfct); } static inline void nf_conntrack_get(struct nf_conntrack *nfct) { if (nfct) - atomic_inc(&nfct->use); + refcount_inc(&nfct->use); } #endif /* _NF_CONNTRACK_COMMON_H */ -- cgit v1.2.3 From 3fce16493dc1aa2c9af3d7e7bd360dfe203a3e6a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:23 +0100 Subject: netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook ip_ct_attach predates struct nf_ct_hook, we can place it there and remove the exported symbol. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3fda1a508733..e0e3f3355ab1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -440,7 +440,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include -extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, @@ -463,6 +462,7 @@ struct nf_ct_hook { void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); }; extern struct nf_ct_hook __rcu *nf_ct_hook; -- cgit v1.2.3 From 285c8a7a58158cb1805c97ff03875df2ba2ea1fe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:24 +0100 Subject: netfilter: make function op structures const No functional changes, these structures should be const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e0e3f3355ab1..15e71bfff726 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -381,13 +381,13 @@ struct nf_nat_hook { enum ip_conntrack_dir dir); }; -extern struct nf_nat_hook __rcu *nf_nat_hook; +extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) - struct nf_nat_hook *nat_hook; + const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); @@ -464,7 +464,7 @@ struct nf_ct_hook { const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); }; -extern struct nf_ct_hook __rcu *nf_ct_hook; +extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; @@ -479,7 +479,7 @@ struct nfnl_ct_hook { void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; -extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; +extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; /** * nf_skb_duplicated - TEE target has sent a packet -- cgit v1.2.3 From 6ae7989c9af0d98ab64196f4f4c6f6499454bd23 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 05:03:25 +0100 Subject: netfilter: conntrack: avoid useless indirection during conntrack destruction nf_ct_put() results in a usesless indirection: nf_ct_put -> nf_conntrack_put -> nf_conntrack_destroy -> rcu readlock + indirect call of ct_hooks->destroy(). There are two _put helpers: nf_ct_put and nf_conntrack_put. The latter is what should be used in code that MUST NOT cause a linker dependency on the conntrack module (e.g. calls from core network stack). Everyone else should call nf_ct_put() instead. A followup patch will convert a few nf_conntrack_put() calls to nf_ct_put(), in particular from modules that already have a conntrack dependency such as act_ct or even nf_conntrack itself. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 2 ++ include/net/netfilter/nf_conntrack.h | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a03f7a80b9ab..2770db2fa080 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -29,6 +29,8 @@ struct nf_conntrack { }; void nf_conntrack_destroy(struct nf_conntrack *nfct); + +/* like nf_ct_put, but without module dependency on nf_conntrack */ static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && refcount_dec_and_test(&nfct->use)) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a4a14f3a5e38..8731d5bcb47d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,6 +76,8 @@ struct nf_conn { * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; @@ -170,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) return (struct nf_conn *)(nfct & NFCT_PTRMASK); } +void nf_ct_destroy(struct nf_conntrack *nfct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - WARN_ON(!ct); - nf_conntrack_put(&ct->ct_general); + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); } /* Protocol module loading */ -- cgit v1.2.3 From 6316136ec6e3dd1c302f7e7289a9ee46ecc610ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Jan 2022 15:46:16 +0100 Subject: netfilter: egress: avoid a lockdep splat include/linux/netfilter_netdev.h:97 suspicious rcu_dereference_check() usage! 2 locks held by sd-resolve/1100: 0: ..(rcu_read_lock_bh){1:3}, at: ip_finish_output2 1: ..(rcu_read_lock_bh){1:3}, at: __dev_queue_xmit __dev_queue_xmit+0 .. The helper has two callers, one uses rcu_read_lock, the other rcu_read_lock_bh(). Annotate the dereference to reflect this. Fixes: 42df6e1d221dd ("netfilter: Introduce egress hook") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index b71b57a83bb4..b4dd96e4dc8d 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -94,7 +94,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, return skb; #endif - e = rcu_dereference(dev->nf_hooks_egress); + e = rcu_dereference_check(dev->nf_hooks_egress, rcu_read_lock_bh_held()); if (!e) return skb; -- cgit v1.2.3 From 2c865a8a28a10e9800a3dd07ca339d24563e3d65 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:19 +0100 Subject: netfilter: nf_tables: add rule blob layout This patch adds a blob layout per chain to represent the ruleset in the packet datapath. size (unsigned long) struct nft_rule_dp struct nft_expr ... struct nft_rule_dp struct nft_expr ... struct nft_rule_dp (is_last=1) The new structure nft_rule_dp represents the rule in a more compact way (smaller memory footprint) compared to the control-plane nft_rule structure. The ruleset blob is a read-only data structure. The first field contains the blob size, then the rules containing expressions. There is a trailing rule which is used by the tracing infrastructure which is equivalent to the NULL rule marker in the previous representation. The blob size field does not include the size of this trailing rule marker. The ruleset blob is generated from the commit path. This patch reuses the infrastructure available since 0cbc06b3faba ("netfilter: nf_tables: remove synchronize_rcu in commit phase") to build the array of rules per chain. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a0d9e0b47ab8..5a046b01bdab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -974,6 +974,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, #define NFT_CHAIN_POLICY_UNSET U8_MAX +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + /** * struct nft_chain - nf_tables chain * @@ -987,8 +1001,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, * @name: name of the chain */ struct nft_chain { - struct nft_rule *__rcu *rules_gen_0; - struct nft_rule *__rcu *rules_gen_1; + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; @@ -1003,7 +1017,7 @@ struct nft_chain { u8 *udata; /* Only used during control plane commit phase: */ - struct nft_rule **rules_next; + struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); @@ -1321,7 +1335,7 @@ struct nft_traceinfo { const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; - const struct nft_rule *rule; + const struct nft_rule_dp *rule; const struct nft_verdict *verdict; enum nft_trace_types type; bool packet_dumped; -- cgit v1.2.3 From 642c8eff5c6099dfde386ca3906fa55dc98f9ade Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:20 +0100 Subject: netfilter: nf_tables: add NFT_REG32_NUM Add a definition including the maximum number of 32-bits registers that are used a scratchpad memory area to store data. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5a046b01bdab..515e5db97e01 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -105,6 +105,8 @@ struct nft_data { }; } __attribute__((aligned(__alignof__(u64)))); +#define NFT_REG32_NUM 20 + /** * struct nft_regs - nf_tables register set * @@ -115,7 +117,7 @@ struct nft_data { */ struct nft_regs { union { - u32 data[20]; + u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; -- cgit v1.2.3 From 12e4ecfa244be2f117ef5304d2d866b65e70bff3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:21 +0100 Subject: netfilter: nf_tables: add register tracking infrastructure This patch adds new infrastructure to skip redundant selector store operations on the same register to achieve a performance boost from the packet path. This is particularly noticeable in pure linear rulesets but it also helps in rulesets which are already heaving relying in maps to avoid ruleset linear inspection. The idea is to keep data of the most recurrent store operations on register to reuse them with cmp and lookup expressions. This infrastructure allows for dynamic ruleset updates since the ruleset blob reduction happens from the kernel. Userspace still needs to be updated to maximize register utilization to cooperate to improve register data reuse / reduce number of store on register operations. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 515e5db97e01..1c37ce61daea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -122,6 +122,16 @@ struct nft_regs { }; }; +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -886,6 +896,8 @@ struct nft_expr_ops { int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, -- cgit v1.2.3 From be5650f8f47e8cffbbbcad08b71103685e971f20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 9 Jan 2022 17:11:24 +0100 Subject: netfilter: nft_bitwise: track register operations Check if the destination register already contains the data that this bitwise expression performs. This allows to skip this redundant operation. If the destination contains a different bitwise operation, cancel the register tracking information. If the destination contains no bitwise operation, update the register tracking information. Update the payload and meta expression to check if this bitwise operation has been already performed on the register. Hence, both the payload/meta and the bitwise expressions are reduced. There is also a special case: If source register != destination register and source register is not updated by a previous bitwise operation, then transfer selector from the source register to the destination register. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1c37ce61daea..eaf55da9a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -358,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); struct nft_set_ext; -- cgit v1.2.3