From 5fb9a9fb71a33be61d7d8e8ba4597bfb18d604d0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 22 Jun 2023 18:59:19 +0200 Subject: wifi: cfg80211: fix sband iftype data lookup for AP_VLAN AP_VLAN interfaces are virtual, so doesn't really exist as a type for capabilities. When passed in as a type, AP is the one that's really intended. Fixes: c4cbaf7973a7 ("cfg80211: Add support for HE") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230622165919.46841-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7c7d03aa9d06..d6fa7c8767ad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) return NULL; + if (iftype == NL80211_IFTYPE_AP_VLAN) + iftype = NL80211_IFTYPE_AP; + for (i = 0; i < sband->n_iftype_data; i++) { const struct ieee80211_sband_iftype_data *data = &sband->iftype_data[i]; -- cgit v1.2.3 From 8a70ed9520c5fafaac91053cacdd44625c39e188 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Aug 2023 08:49:23 +0000 Subject: tcp: add missing family to tcp_set_ca_state() tracepoint Before this code is copied, add the missing family, as we did in commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all tcp:tracepoints") Fixes: 15fcdf6ae116 ("tcp: Add tracepoint for tcp_set_ca_state") Signed-off-by: Eric Dumazet Cc: Ping Gan Cc: Manjusaka Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230808084923.2239142-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index bf06db8d2046..7b1ddffa3dfc 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set, __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) + __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) @@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set, __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; @@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set, __entry->cong_state = ca_state; ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", + TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", + show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, -- cgit v1.2.3 From 809e4dc71a0f2b8d2836035d98603694fff11d5d Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 4 Aug 2023 03:37:38 -0400 Subject: bpf, sockmap: Fix bug that strp_done cannot be called strp_done is only called when psock->progs.stream_parser is not NULL, but stream_parser was set to NULL by sk_psock_stop_strp(), called by sk_psock_drop() earlier. So, strp_done can never be called. Introduce SK_PSOCK_RX_ENABLED to mark whether there is strp on psock. Change the condition for calling strp_done from judging whether stream_parser is set to judging whether this flag is set. This flag is only set once when strp_init() succeeds, and will never be cleared later. Fixes: c0d95d3380ee ("bpf, sockmap: Re-evaluate proto ops when psock is removed from sockmap") Signed-off-by: Xu Kuohai Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20230804073740.194770-3-xukuohai@huaweicloud.com Signed-off-by: Martin KaFai Lau --- include/linux/skmsg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 054d7911bfc9..c1637515a8a4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -62,6 +62,7 @@ struct sk_psock_progs { enum sk_psock_state_bits { SK_PSOCK_TX_ENABLED, + SK_PSOCK_RX_STRP_ENABLED, }; struct sk_psock_link { -- cgit v1.2.3 From 5f68718b34a531a556f2f50300ead2862278da26 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Aug 2023 14:31:54 +0200 Subject: netfilter: nf_tables: GC transaction API to avoid race with control plane The set types rhashtable and rbtree use a GC worker to reclaim memory. From system work queue, in periodic intervals, a scan of the table is done. The major caveat here is that the nft transaction mutex is not held. This causes a race between control plane and GC when they attempt to delete the same element. We cannot grab the netlink mutex from the work queue, because the control plane has to wait for the GC work queue in case the set is to be removed, so we get following deadlock: cpu 1 cpu2 GC work transaction comes in , lock nft mutex `acquire nft mutex // BLOCKS transaction asks to remove the set set destruction calls cancel_work_sync() cancel_work_sync will now block forever, because it is waiting for the mutex the caller already owns. This patch adds a new API that deals with garbage collection in two steps: 1) Lockless GC of expired elements sets on the NFT_SET_ELEM_DEAD_BIT so they are not visible via lookup. Annotate current GC sequence in the GC transaction. Enqueue GC transaction work as soon as it is full. If ruleset is updated, then GC transaction is aborted and retried later. 2) GC work grabs the mutex. If GC sequence has changed then this GC transaction lost race with control plane, abort it as it contains stale references to objects and let GC try again later. If the ruleset is intact, then this GC transaction deactivates and removes the elements and it uses call_rcu() to destroy elements. Note that no elements are removed from GC lockless path, the _DEAD bit is set and pointers are collected. GC catchall does not remove the elements anymore too. There is a new set->dead flag that is set on to abort the GC transaction to deal with set->ops->destroy() path which removes the remaining elements in the set from commit_release, where no mutex is held. To deal with GC when mutex is held, which allows safe deactivate and removal, add sync GC API which releases the set element object via call_rcu(). This is used by rbtree and pipapo backends which also perform garbage collection from control plane path. Since element removal from sets can happen from control plane and element garbage collection/timeout, it is necessary to keep the set structure alive until all elements have been deactivated and destroyed. We cannot do a cancel_work_sync or flush_work in nft_set_destroy because its called with the transaction mutex held, but the aforementioned async work queue might be blocked on the very mutex that nft_set_destroy() callchain is sitting on. This gives us the choice of ABBA deadlock or UaF. To avoid both, add set->refs refcount_t member. The GC API can then increment the set refcount and release it once the elements have been free'd. Set backends are adapted to use the GC transaction API in a follow up patch entitled: ("netfilter: nf_tables: use gc transaction API in set backends") This is joint work with Florian Westphal. Fixes: cfed7e1b1f8e ("netfilter: nf_tables: add set garbage collection helpers") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 64 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 640441a2f926..7256e9c80477 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -512,6 +512,7 @@ struct nft_set_elem_expr { * * @list: table set list node * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set @@ -541,6 +542,7 @@ struct nft_set_elem_expr { struct nft_set { struct list_head list; struct list_head bindings; + refcount_t refs; struct nft_table *table; possible_net_t net; char *name; @@ -562,7 +564,8 @@ struct nft_set { struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + dead:1, genmask:2; u8 klen; u8 dlen; @@ -1592,6 +1595,32 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) clear_bit(NFT_SET_ELEM_BUSY_BIT, word); } +#define NFT_SET_ELEM_DEAD_MASK (1 << 3) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT 3 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#else +#error +#endif + +static inline void nft_set_elem_dead(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); +} + /** * struct nft_trans - nf_tables object update in transaction * @@ -1732,6 +1761,38 @@ struct nft_trans_flowtable { #define nft_trans_flowtable_flags(trans) \ (((struct nft_trans_flowtable *)trans->data)->flags) +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u8 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, + unsigned int gc_seq); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1758,6 +1819,7 @@ struct nftables_pernet { struct mutex commit_mutex; u64 table_handle; unsigned int base_seq; + unsigned int gc_seq; }; extern unsigned int nf_tables_net_id; -- cgit v1.2.3 From a2dd0233cbc4d8a0abb5f64487487ffc9265beb5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Aug 2023 15:00:36 +0200 Subject: netfilter: nf_tables: remove busy mark and gc batch API Ditch it, it has been replace it by the GC transaction API and it has no clients anymore. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 98 ++------------------------------------- 1 file changed, 3 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7256e9c80477..35870858ddf2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -599,7 +599,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net, struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, const struct nft_set *set); -void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -816,62 +815,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, void *elem); -/** - * struct nft_set_gc_batch_head - nf_tables set garbage collection batch - * - * @rcu: rcu head - * @set: set the elements belong to - * @cnt: count of elements - */ -struct nft_set_gc_batch_head { - struct rcu_head rcu; - const struct nft_set *set; - unsigned int cnt; -}; - -#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ - sizeof(struct nft_set_gc_batch_head)) / \ - sizeof(void *)) - -/** - * struct nft_set_gc_batch - nf_tables set garbage collection batch - * - * @head: GC batch head - * @elems: garbage collection elements - */ -struct nft_set_gc_batch { - struct nft_set_gc_batch_head head; - void *elems[NFT_SET_GC_BATCH_SIZE]; -}; - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp); -void nft_set_gc_batch_release(struct rcu_head *rcu); - -static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) -{ - if (gcb != NULL) - call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); -} - -static inline struct nft_set_gc_batch * -nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, - gfp_t gfp) -{ - if (gcb != NULL) { - if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) - return gcb; - nft_set_gc_batch_complete(gcb); - } - return nft_set_gc_batch_alloc(set, gfp); -} - -static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, - void *elem) -{ - gcb->elems[gcb->head.cnt++] = elem; -} - struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type @@ -1560,47 +1503,12 @@ static inline void nft_set_elem_change_active(const struct net *net, #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ -/* - * We use a free bit in the genmask field to indicate the element - * is busy, meaning it is currently being processed either by - * the netlink API or GC. - * - * Even though the genmask is only a single byte wide, this works - * because the extension structure if fully constant once initialized, - * so there are no non-atomic write accesses unless it is already - * marked busy. - */ -#define NFT_SET_ELEM_BUSY_MASK (1 << 2) - -#if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT 2 -#elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) -#else -#error -#endif - -static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); - return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) -{ - unsigned long *word = (unsigned long *)ext; - - clear_bit(NFT_SET_ELEM_BUSY_BIT, word); -} - -#define NFT_SET_ELEM_DEAD_MASK (1 << 3) +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT 3 +#define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 3) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif -- cgit v1.2.3