From ed3557c947e1d4164d370cc2d69dd7eb92706f0a Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 3 Jan 2023 17:56:39 +0100
Subject: ieee802154: Add support for user scanning requests

The ieee802154 layer should be able to scan a set of channels in order
to look for beacons advertizing PANs. Supporting this involves adding
two user commands: triggering scans and aborting scans. The user should
also be notified when a new beacon is received and also upon scan
termination.

A scan request structure is created to list the requirements and to be
accessed asynchronously when changing channels or receiving beacons.

Mac layers may now implement the ->trigger_scan() and ->abort_scan()
hooks.

Co-developed-by: David Girault <david.girault@qorvo.com>
Signed-off-by: David Girault <david.girault@qorvo.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230103165644.432209-2-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/cfg802154.h | 25 +++++++++++++++++++++
 include/net/nl802154.h  | 58 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index d09c393d229f..76d4f95e9974 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -18,6 +18,7 @@
 
 struct wpan_phy;
 struct wpan_phy_cca;
+struct cfg802154_scan_request;
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 struct ieee802154_llsec_device_key;
@@ -67,6 +68,10 @@ struct cfg802154_ops {
 				struct wpan_dev *wpan_dev, bool mode);
 	int	(*set_ackreq_default)(struct wpan_phy *wpan_phy,
 				      struct wpan_dev *wpan_dev, bool ackreq);
+	int	(*trigger_scan)(struct wpan_phy *wpan_phy,
+				struct cfg802154_scan_request *request);
+	int	(*abort_scan)(struct wpan_phy *wpan_phy,
+			      struct wpan_dev *wpan_dev);
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 	void	(*get_llsec_table)(struct wpan_phy *wpan_phy,
 				   struct wpan_dev *wpan_dev,
@@ -278,6 +283,26 @@ struct ieee802154_coord_desc {
 	bool gts_permit;
 };
 
+/**
+ * struct cfg802154_scan_request - Scan request
+ *
+ * @type: type of scan to be performed
+ * @page: page on which to perform the scan
+ * @channels: channels in te %page to be scanned
+ * @duration: time spent on each channel, calculated with:
+ *            aBaseSuperframeDuration * (2 ^ duration + 1)
+ * @wpan_dev: the wpan device on which to perform the scan
+ * @wpan_phy: the wpan phy on which to perform the scan
+ */
+struct cfg802154_scan_request {
+	enum nl802154_scan_types type;
+	u8 page;
+	u32 channels;
+	u8 duration;
+	struct wpan_dev *wpan_dev;
+	struct wpan_phy *wpan_phy;
+};
+
 struct ieee802154_llsec_key_id {
 	u8 mode;
 	u8 id;
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index b79a89d5207c..c267fa1c5aac 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -73,6 +73,9 @@ enum nl802154_commands {
 	NL802154_CMD_DEL_SEC_LEVEL,
 
 	NL802154_CMD_SCAN_EVENT,
+	NL802154_CMD_TRIGGER_SCAN,
+	NL802154_CMD_ABORT_SCAN,
+	NL802154_CMD_SCAN_DONE,
 
 	/* add new commands above here */
 
@@ -134,6 +137,13 @@ enum nl802154_attrs {
 	NL802154_ATTR_NETNS_FD,
 
 	NL802154_ATTR_COORDINATOR,
+	NL802154_ATTR_SCAN_TYPE,
+	NL802154_ATTR_SCAN_FLAGS,
+	NL802154_ATTR_SCAN_CHANNELS,
+	NL802154_ATTR_SCAN_PREAMBLE_CODES,
+	NL802154_ATTR_SCAN_MEAN_PRF,
+	NL802154_ATTR_SCAN_DURATION,
+	NL802154_ATTR_SCAN_DONE_REASON,
 
 	/* add attributes here, update the policy in nl802154.c */
 
@@ -259,6 +269,54 @@ enum nl802154_coord {
 	NL802154_COORD_MAX,
 };
 
+/**
+ * enum nl802154_scan_types - Scan types
+ *
+ * @__NL802154_SCAN_INVALID: scan type number 0 is reserved
+ * @NL802154_SCAN_ED: An ED scan allows a device to obtain a measure of the peak
+ *	energy in each requested channel
+ * @NL802154_SCAN_ACTIVE: Locate any coordinator transmitting Beacon frames using
+ *	a Beacon Request command
+ * @NL802154_SCAN_PASSIVE: Locate any coordinator transmitting Beacon frames
+ * @NL802154_SCAN_ORPHAN: Relocate coordinator following a loss of synchronisation
+ * @NL802154_SCAN_ENHANCED_ACTIVE: Same as Active using Enhanced Beacon Request
+ *	command instead of Beacon Request command
+ * @NL802154_SCAN_RIT_PASSIVE: Passive scan for RIT Data Request command frames
+ *	instead of Beacon frames
+ * @NL802154_SCAN_ATTR_MAX: Maximum SCAN attribute number
+ */
+enum nl802154_scan_types {
+	__NL802154_SCAN_INVALID,
+	NL802154_SCAN_ED,
+	NL802154_SCAN_ACTIVE,
+	NL802154_SCAN_PASSIVE,
+	NL802154_SCAN_ORPHAN,
+	NL802154_SCAN_ENHANCED_ACTIVE,
+	NL802154_SCAN_RIT_PASSIVE,
+
+	/* keep last */
+	NL802154_SCAN_ATTR_MAX,
+};
+
+/**
+ * enum nl802154_scan_done_reasons - End of scan reasons
+ *
+ * @__NL802154_SCAN_DONE_REASON_INVALID: scan done reason number 0 is reserved.
+ * @NL802154_SCAN_DONE_REASON_FINISHED: The scan just finished naturally after
+ *	going through all the requested and possible (complex) channels.
+ * @NL802154_SCAN_DONE_REASON_ABORTED: The scan was aborted upon user request.
+ *	a Beacon Request command
+ * @NL802154_SCAN_DONE_REASON_MAX: Maximum scan done reason attribute number.
+ */
+enum nl802154_scan_done_reasons {
+	__NL802154_SCAN_DONE_REASON_INVALID,
+	NL802154_SCAN_DONE_REASON_FINISHED,
+	NL802154_SCAN_DONE_REASON_ABORTED,
+
+	/* keep last */
+	NL802154_SCAN_DONE_REASON_MAX,
+};
+
 /**
  * enum nl802154_cca_modes - cca modes
  *
-- 
cgit v1.2.3


From 44def58f5835bbfaf81902c88460fd86a551f4b7 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 3 Jan 2023 17:56:40 +0100
Subject: ieee802154: Define a beacon frame header

This definition will be used when adding support for scanning and defines
the content of a beacon frame header as in the 802.15.4 specification.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230103165644.432209-3-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/ieee802154_netdev.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 4c33a20ea57f..2f2196049a86 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -38,6 +38,42 @@
 
 #include <net/cfg802154.h>
 
+struct ieee802154_beacon_hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	u16 beacon_order:4,
+	    superframe_order:4,
+	    final_cap_slot:4,
+	    battery_life_ext:1,
+	    reserved0:1,
+	    pan_coordinator:1,
+	    assoc_permit:1;
+	u8  gts_count:3,
+	    gts_reserved:4,
+	    gts_permit:1;
+	u8  pend_short_addr_count:3,
+	    reserved1:1,
+	    pend_ext_addr_count:3,
+	    reserved2:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	u16 assoc_permit:1,
+	    pan_coordinator:1,
+	    reserved0:1,
+	    battery_life_ext:1,
+	    final_cap_slot:4,
+	    superframe_order:4,
+	    beacon_order:4;
+	u8  gts_permit:1,
+	    gts_reserved:4,
+	    gts_count:3;
+	u8  reserved2:1,
+	    pend_ext_addr_count:3,
+	    reserved1:1,
+	    pend_short_addr_count:3;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+} __packed;
+
 struct ieee802154_sechdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u8 level:3,
-- 
cgit v1.2.3


From d2aaf2a01792ccf214f933d0b1ca2d41788c7b16 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 3 Jan 2023 17:56:41 +0100
Subject: ieee802154: Introduce a helper to validate a channel

This helper for now only checks if the page member and channel member
are valid (in the specification range) and supported (by checking the
device capabilities). Soon two new parameters will be introduced and
having this helper will let us only modify its content rather than
modifying the logic everywhere else in the subsystem.

There is not functional change.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230103165644.432209-4-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/cfg802154.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 76d4f95e9974..1184b543fba7 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -246,6 +246,17 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net)
 	write_pnet(&wpan_phy->_net, net);
 }
 
+static inline bool ieee802154_chan_is_valid(struct wpan_phy *phy,
+					    u8 page, u8 channel)
+{
+	if (page > IEEE802154_MAX_PAGE ||
+	    channel > IEEE802154_MAX_CHANNEL ||
+	    !(phy->supported.channels[page] & BIT(channel)))
+		return false;
+
+	return true;
+}
+
 /**
  * struct ieee802154_addr - IEEE802.15.4 device address
  * @mode: Address mode from frame header. Can be one of:
-- 
cgit v1.2.3


From 5755cd4d9432779027771e43e51d81a2994ed795 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 3 Jan 2023 17:56:42 +0100
Subject: mac802154: Prepare forcing specific symbol duration

The scan logic will bypass the whole ->set_channel() logic from the top
by calling the driver hook to just switch between channels when
required.

We can no longer rely on the "current" page/channel settings to set the
right symbol duration. Let's add these as new parameters to allow
providing the page/channel couple that we want.

There is no functional change.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230103165644.432209-5-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/cfg802154.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 1184b543fba7..c16ae5d2dc86 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -483,6 +483,7 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy)
 	return dev_name(&phy->dev);
 }
 
-void ieee802154_configure_durations(struct wpan_phy *phy);
+void ieee802154_configure_durations(struct wpan_phy *phy,
+				    unsigned int page, unsigned int channel);
 
 #endif /* __NET_CFG802154_H */
-- 
cgit v1.2.3


From 57588c71177f0bfc08509c2c3a9bfe32850c0786 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 3 Jan 2023 17:56:44 +0100
Subject: mac802154: Handle passive scanning

Implement the core hooks in order to provide the softMAC layer support
for passive scans. Scans are requested by the user and can be aborted.

Changing channels manually is prohibited during scans.

The implementation uses a workqueue triggered at a certain interval
depending on the symbol duration for the current channel and the
duration order provided. More advanced drivers with internal scheduling
capabilities might require additional care but there is none mainline
yet.

Received beacons during a passive scan are processed in a work queue and
their result forwarded to the upper layer.

Active scanning is not supported yet.

Co-developed-by: David Girault <david.girault@qorvo.com>
Signed-off-by: David Girault <david.girault@qorvo.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230103165644.432209-7-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/cfg802154.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index c16ae5d2dc86..0b0f81a945b6 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -314,6 +314,22 @@ struct cfg802154_scan_request {
 	struct wpan_phy *wpan_phy;
 };
 
+/**
+ * struct cfg802154_mac_pkt - MAC packet descriptor (beacon/command)
+ * @node: MAC packets to process list member
+ * @skb: the received sk_buff
+ * @sdata: the interface on which @skb was received
+ * @page: page configuration when @skb was received
+ * @channel: channel configuration when @skb was received
+ */
+struct cfg802154_mac_pkt {
+	struct list_head node;
+	struct sk_buff *skb;
+	struct ieee802154_sub_if_data *sdata;
+	u8 page;
+	u8 channel;
+};
+
 struct ieee802154_llsec_key_id {
 	u8 mode;
 	u8 id;
-- 
cgit v1.2.3


From 57af281e5389b6fefedb3685f86847cbb0055f75 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 6 Oct 2022 21:45:42 +0100
Subject: rxrpc: Tidy up abort generation infrastructure

Tidy up the abort generation infrastructure in the following ways:

 (1) Create an enum and string mapping table to list the reasons an abort
     might be generated in tracing.

 (2) Replace the 3-char string with the values from (1) in the places that
     use that to log the abort source.  This gets rid of a memcpy() in the
     tracepoint.

 (3) Subsume the rxrpc_rx_eproto tracepoint with the rxrpc_abort tracepoint
     and use values from (1) to indicate the trace reason.

 (4) Always make a call to an abort function at the point of the abort
     rather than stashing the values into variables and using goto to get
     to a place where it reported.  The C optimiser will collapse the calls
     together as appropriate.  The abort functions return a value that can
     be returned directly if appropriate.

Note that this extends into afs also at the points where that generates an
abort.  To aid with this, the afs sources need to #define
RXRPC_TRACE_ONLY_DEFINE_ENUMS before including the rxrpc tracing header
because they don't have access to the rxrpc internal structures that some
of the tracepoints make use of.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/net/af_rxrpc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index d5a5ae926380..ba717eac0229 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
 struct sock;
 struct socket;
 struct rxrpc_call;
+enum rxrpc_abort_reason;
 
 enum rxrpc_interruptibility {
 	RXRPC_INTERRUPTIBLE,	/* Call is interruptible */
@@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
 			   struct iov_iter *, size_t *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
-			     u32, int, const char *);
+			     u32, int, enum rxrpc_abort_reason);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
 void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 			   struct sockaddr_rxrpc *);
-- 
cgit v1.2.3


From 9053637e0da783efdb37bbfea6a27b856c0228d7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 5 Jan 2023 22:33:58 -0800
Subject: devlink: remove the registration guarantee of references

The objective of exposing the devlink instance locks to
drivers was to let them use these locks to prevent user space
from accessing the device before it's fully initialized.
This is difficult because devlink_unregister() waits for all
references to be released, meaning that devlink_unregister()
can't itself be called under the instance lock.

To avoid this issue devlink_register() was moved after subobject
registration a while ago. Unfortunately the netdev paths get
a hold of the devlink instances _before_ they are registered.
Ideally netdev should wait for devlink init to finish (synchronizing
on the instance lock). This can't work because we don't know if the
instance will _ever_ be registered (in case of failures it may not).
The other option of returning an error until devlink_register()
is called is unappealing (user space would get a notification
netdev exist but would have to wait arbitrary amount of time
before accessing some of its attributes).

Weaken the guarantees of the devlink references.

Holding a reference will now only guarantee that the memory
of the object is around. Another way of looking at it is that
the reference now protects the object not its "registered" status.
Use devlink instance lock to synchronize unregistration.

This implies that releasing of the "main" reference of the devlink
instance moves from devlink_unregister() to devlink_free().

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6a2e4f21779f..425ecef431b7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1647,6 +1647,8 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
 	return devlink_alloc_ns(ops, priv_size, &init_net, dev);
 }
 void devlink_set_features(struct devlink *devlink, u64 features);
+int devl_register(struct devlink *devlink);
+void devl_unregister(struct devlink *devlink);
 void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
-- 
cgit v1.2.3


From 4444bc2116aecdcde87dce80373540adc8bd478b Mon Sep 17 00:00:00 2001
From: Alexander Wetzel <alexander@wetzel-home.de>
Date: Fri, 30 Dec 2022 13:18:49 +0100
Subject: wifi: mac80211: Proper mark iTXQs for resumption

When a running wake_tx_queue() call is aborted due to a hw queue stop
the corresponding iTXQ is not always correctly marked for resumption:
wake_tx_push_queue() can stops the queue run without setting
@IEEE80211_TXQ_STOP_NETIF_TX.

Without the @IEEE80211_TXQ_STOP_NETIF_TX flag __ieee80211_wake_txqs()
will not schedule a new queue run and remaining frames in the queue get
stuck till another frame is queued to it.

Fix the issue for all drivers - also the ones with custom wake_tx_queue
callbacks - by moving the logic into ieee80211_tx_dequeue() and drop the
redundant @txqs_stopped.

@IEEE80211_TXQ_STOP_NETIF_TX is also renamed to @IEEE80211_TXQ_DIRTY to
better describe the flag.

Fixes: c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue")
Signed-off-by: Alexander Wetzel <alexander@wetzel-home.de>
Link: https://lore.kernel.org/r/20221230121850.218810-1-alexander@wetzel-home.de
Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 689da327ce2e..e3235b9c02c2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1832,8 +1832,6 @@ struct ieee80211_vif_cfg {
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void \*).
  * @txq: the multicast data TX queue
- * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
- *	protected by fq->lock.
  * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
  *	&enum ieee80211_offload_flags.
  * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled.
@@ -1863,8 +1861,6 @@ struct ieee80211_vif {
 	bool probe_req_reg;
 	bool rx_mcast_action_reg;
 
-	bool txqs_stopped[IEEE80211_NUM_ACS];
-
 	struct ieee80211_vif *mbssid_tx_vif;
 
 	/* must be last */
-- 
cgit v1.2.3


From 952f6c9daf509f7919887c26753884fa530f8622 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 26 Dec 2022 20:16:09 +0100
Subject: wifi: mac80211: Drop stations iterator where the iterator function
 may sleep

This reverts commit acb99b9b2a08f ("mac80211: Add stations iterator
where the iterator function may sleep"). A different approach was found
for the rtw88 driver where most of the problematic locks were converted
to a driver-local mutex. Drop ieee80211_iterate_stations() because there
are no users of that function.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20221226191609.2934234-1-martin.blumenstingl@googlemail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 689da327ce2e..b421a1bfc7c5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5888,9 +5888,6 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
  * This version can only be used while holding the wiphy mutex.
- * The driver must not call this with a lock held that it can also take in
- * response to callbacks from mac80211, and it must not call this within
- * callbacks made by mac80211 - both would result in deadlocks.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
@@ -5904,24 +5901,6 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw,
 						struct ieee80211_vif *vif),
 					     void *data);
 
-/**
- * ieee80211_iterate_stations - iterate stations
- *
- * This function iterates over all stations associated with a given
- * hardware that are currently uploaded to the driver and calls the callback
- * function for them.
- * This function allows the iterator function to sleep, when the iterator
- * function is atomic @ieee80211_iterate_stations_atomic can be used.
- *
- * @hw: the hardware struct of which the interfaces should be iterated over
- * @iterator: the iterator function to call, cannot sleep
- * @data: first argument of the iterator function
- */
-void ieee80211_iterate_stations(struct ieee80211_hw *hw,
-				void (*iterator)(void *data,
-						 struct ieee80211_sta *sta),
-				void *data);
-
 /**
  * ieee80211_iterate_stations_atomic - iterate stations
  *
-- 
cgit v1.2.3


From af6d10345ca76670c1b7c37799f0d5576ccef277 Mon Sep 17 00:00:00 2001
From: Jon Maxwell <jmaxwell37@gmail.com>
Date: Thu, 12 Jan 2023 12:25:32 +1100
Subject: ipv6: remove max_size check inline with ipv4

In ip6_dst_gc() replace:

  if (entries > gc_thresh)

With:

  if (entries > ops->gc_thresh)

Sending Ipv6 packets in a loop via a raw socket triggers an issue where a
route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly
consumes the Ipv6 max_size threshold which defaults to 4096 resulting in
these warnings:

[1]   99.187805] dst_alloc: 7728 callbacks suppressed
[2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
.
.
[300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.

When this happens the packet is dropped and sendto() gets a network is
unreachable error:

remaining pkt 200557 errno 101
remaining pkt 196462 errno 101
.
.
remaining pkt 126821 errno 101

Implement David Aherns suggestion to remove max_size check seeing that Ipv6
has a GC to manage memory usage. Ipv4 already does not check max_size.

Here are some memory comparisons for Ipv4 vs Ipv6 with the patch:

Test by running 5 instances of a program that sends UDP packets to a raw
socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar
program.

Ipv4:

Before test:

MemFree:        29427108 kB
Slab:             237612 kB

ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        2881   3990    192   42    2 : tunables    0    0    0

During test:

MemFree:        29417608 kB
Slab:             247712 kB

ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache       44394  44394    192   42    2 : tunables    0    0    0

After test:

MemFree:        29422308 kB
Slab:             238104 kB

ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

Ipv6 with patch:

Errno 101 errors are not observed anymore with the patch.

Before test:

MemFree:        29422308 kB
Slab:             238104 kB

ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

During Test:

MemFree:        29431516 kB
Slab:             240940 kB

ip6_dst_cache      11980  12064    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

After Test:

MemFree:        29441816 kB
Slab:             238132 kB

ip6_dst_cache       1902   2432    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

Tested-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dst_ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 88ff7bb2bb9b..632086b2f644 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -16,7 +16,7 @@ struct dst_ops {
 	unsigned short		family;
 	unsigned int		gc_thresh;
 
-	int			(*gc)(struct dst_ops *ops);
+	void			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
 	unsigned int		(*default_advmss)(const struct dst_entry *);
 	unsigned int		(*mtu)(const struct dst_entry *);
-- 
cgit v1.2.3


From 3a415d59c1dbec9d772dbfab2d2520d98360caae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 13 Jan 2023 16:48:49 +0000
Subject: net/sched: sch_taprio: fix possible use-after-free

syzbot reported a nasty crash [1] in net_tx_action() which
made little sense until we got a repro.

This repro installs a taprio qdisc, but providing an
invalid TCA_RATE attribute.

qdisc_create() has to destroy the just initialized
taprio qdisc, and taprio_destroy() is called.

However, the hrtimer used by taprio had already fired,
therefore advance_sched() called __netif_schedule().

Then net_tx_action was trying to use a destroyed qdisc.

We can not undo the __netif_schedule(), so we must wait
until one cpu serviced the qdisc before we can proceed.

Many thanks to Alexander Potapenko for his help.

[1]
BUG: KMSAN: uninit-value in queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline]
BUG: KMSAN: uninit-value in do_raw_spin_trylock include/linux/spinlock.h:191 [inline]
BUG: KMSAN: uninit-value in __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline]
BUG: KMSAN: uninit-value in _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138
 queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline]
 do_raw_spin_trylock include/linux/spinlock.h:191 [inline]
 __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline]
 _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138
 spin_trylock include/linux/spinlock.h:359 [inline]
 qdisc_run_begin include/net/sch_generic.h:187 [inline]
 qdisc_run+0xee/0x540 include/net/pkt_sched.h:125
 net_tx_action+0x77c/0x9a0 net/core/dev.c:5086
 __do_softirq+0x1cc/0x7fb kernel/softirq.c:571
 run_ksoftirqd+0x2c/0x50 kernel/softirq.c:934
 smpboot_thread_fn+0x554/0x9f0 kernel/smpboot.c:164
 kthread+0x31b/0x430 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30

Uninit was created at:
 slab_post_alloc_hook mm/slab.h:732 [inline]
 slab_alloc_node mm/slub.c:3258 [inline]
 __kmalloc_node_track_caller+0x814/0x1250 mm/slub.c:4970
 kmalloc_reserve net/core/skbuff.c:358 [inline]
 __alloc_skb+0x346/0xcf0 net/core/skbuff.c:430
 alloc_skb include/linux/skbuff.h:1257 [inline]
 nlmsg_new include/net/netlink.h:953 [inline]
 netlink_ack+0x5f3/0x12b0 net/netlink/af_netlink.c:2436
 netlink_rcv_skb+0x55d/0x6c0 net/netlink/af_netlink.c:2507
 rtnetlink_rcv+0x30/0x40 net/core/rtnetlink.c:6108
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921
 sock_sendmsg_nosec net/socket.c:714 [inline]
 sock_sendmsg net/socket.c:734 [inline]
 ____sys_sendmsg+0xabc/0xe90 net/socket.c:2482
 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2536
 __sys_sendmsg net/socket.c:2565 [inline]
 __do_sys_sendmsg net/socket.c:2574 [inline]
 __se_sys_sendmsg net/socket.c:2572 [inline]
 __x64_sys_sendmsg+0x367/0x540 net/socket.c:2572
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 6.0.0-rc2-syzkaller-47461-gac3859c02d7f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022

Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d5517719af4e..af4aa66aaa4e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1288,4 +1288,11 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
 
 int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
 
+/* Make sure qdisc is no longer in SCHED state. */
+static inline void qdisc_synchronize(const struct Qdisc *q)
+{
+	while (test_bit(__QDISC_STATE_SCHED, &q->state))
+		msleep(1);
+}
+
 #endif
-- 
cgit v1.2.3


From 2032e907d8d498fcabfe24b43550c50947817c6d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 Jan 2023 13:47:16 +0100
Subject: netfilter: nf_tables: avoid retpoline overhead for objref calls

objref expression is builtin, so avoid calls to it for
RETOLINE=y builds.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 3e825381ac5c..bedef373ec21 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -164,4 +164,8 @@ void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
 			    const struct nft_pktinfo *pkt,
 			    struct nft_inner_tun_ctx *ctx);
 
+void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt);
+void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
-- 
cgit v1.2.3


From d9e7891476057b24a1acbf10a491e5b9a1c4ae77 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 3 Jan 2023 13:47:17 +0100
Subject: netfilter: nf_tables: avoid retpoline overhead for some ct expression
 calls

nft_ct expression cannot be made builtin to nf_tables without also
forcing the conntrack itself to be builtin.

However, this can be avoided by splitting retrieval of a few
selector keys that only need to access the nf_conn structure,
i.e. no function calls to nf_conntrack code.

Many rulesets start with something like
"ct status established,related accept"

With this change, this no longer requires an indirect call, which
gives about 1.8% more throughput with a simple conntrack-enabled
forwarding test (retpoline thunk used).

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables_core.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index bedef373ec21..780a5f6ad4a6 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -61,6 +61,16 @@ struct nft_immediate_expr {
 extern const struct nft_expr_ops nft_cmp_fast_ops;
 extern const struct nft_expr_ops nft_cmp16_fast_ops;
 
+struct nft_ct {
+	enum nft_ct_keys	key:8;
+	enum ip_conntrack_dir	dir:8;
+	u8			len;
+	union {
+		u8		dreg;
+		u8		sreg;
+	};
+};
+
 struct nft_payload {
 	enum nft_payload_bases	base:8;
 	u8			offset;
@@ -140,6 +150,8 @@ void nft_rt_get_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
                       const struct nft_pktinfo *pkt);
+void nft_ct_get_fast_eval(const struct nft_expr *expr,
+			  struct nft_regs *regs, const struct nft_pktinfo *pkt);
 
 enum {
 	NFT_PAYLOAD_CTX_INNER_TUN	= (1 << 0),
-- 
cgit v1.2.3


From 585b6e1304dcc46e65dc1aaca5973b33abd0c48d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 16 Jan 2023 15:24:11 +0100
Subject: wifi: cfg80211: remove support for static WEP

This reverts commit b8676221f00d ("cfg80211: Add support for
static WEP in the driver") since no driver ever ended up using
it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 03d4f4deadae..1f8f827290a2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1075,7 +1075,6 @@ struct survey_info {
 	s8 noise;
 };
 
-#define CFG80211_MAX_WEP_KEYS	4
 #define CFG80211_MAX_NUM_AKM_SUITES	10
 
 /**
@@ -1099,9 +1098,6 @@ struct survey_info {
  *	port frames over NL80211 instead of the network interface.
  * @control_port_no_preauth: disables pre-auth rx over the nl80211 control
  *	port for mac80211
- * @wep_keys: static WEP keys, if not NULL points to an array of
- *	CFG80211_MAX_WEP_KEYS WEP keys
- * @wep_tx_key: key index (0..3) of the default TX static WEP key
  * @psk: PSK (for devices supporting 4-way-handshake offload)
  * @sae_pwd: password for SAE authentication (for devices supporting SAE
  *	offload)
@@ -1134,8 +1130,6 @@ struct cfg80211_crypto_settings {
 	bool control_port_no_encrypt;
 	bool control_port_over_nl80211;
 	bool control_port_no_preauth;
-	struct key_params *wep_keys;
-	int wep_tx_key;
 	const u8 *psk;
 	const u8 *sae_pwd;
 	u8 sae_pwd_len;
@@ -4683,8 +4677,6 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
- * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation
- *	before connection.
  * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys
  * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs,
  *	in order to not have them reachable in normal drivers, until we have
@@ -4715,7 +4707,6 @@ enum wiphy_flags {
 	WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL	= BIT(21),
 	WIPHY_FLAG_SUPPORTS_5_10_MHZ		= BIT(22),
 	WIPHY_FLAG_HAS_CHANNEL_SWITCH		= BIT(23),
-	WIPHY_FLAG_HAS_STATIC_WEP		= BIT(24),
 };
 
 /**
-- 
cgit v1.2.3


From 648fba791cb0f5ef6166449d056f82e6639fe268 Mon Sep 17 00:00:00 2001
From: Shivani Baranwal <quic_shivbara@quicinc.com>
Date: Tue, 6 Dec 2022 20:07:15 +0530
Subject: wifi: cfg80211: Support 32 bytes KCK key in GTK rekey offload

Currently, maximum KCK key length supported for GTK rekey offload is 24
bytes but with some newer AKMs the KCK key length can be 32 bytes. e.g.,
00-0F-AC:24 AKM suite with SAE finite cyclic group 21. Add support to
allow 32 bytes KCK keys in GTK rekey offload.

Signed-off-by: Shivani Baranwal <quic_shivbara@quicinc.com>
Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20221206143715.1802987-3-quic_vjakkam@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1f8f827290a2..f96db7ad64f1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4682,6 +4682,7 @@ struct cfg80211_ops {
  *	in order to not have them reachable in normal drivers, until we have
  *	complete feature/interface combinations/etc. advertisement. No driver
  *	should set this flag for now.
+ * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK		= BIT(0),
@@ -4694,7 +4695,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
 	WIPHY_FLAG_MESH_AUTH			= BIT(10),
-	/* use hole at 11 */
+	WIPHY_FLAG_SUPPORTS_EXT_KCK_32          = BIT(11),
 	/* use hole at 12 */
 	WIPHY_FLAG_SUPPORTS_FW_ROAM		= BIT(13),
 	WIPHY_FLAG_AP_UAPSD			= BIT(14),
-- 
cgit v1.2.3


From bfc551679cd63ca3a4b3e7f338aa2bb06ce43e25 Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Tue, 6 Dec 2022 13:32:26 +0530
Subject: wifi: cfg80211: Use MLD address to indicate MLD STA disconnection

We use station's MLD address to report disconnection of MLD station.
Update the documentation in multiple places to indicate this.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20221206080226.1702646-4-quic_vjakkam@quicinc.com
[update commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f96db7ad64f1..54a77d906b2d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7775,7 +7775,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 /**
  * cfg80211_del_sta_sinfo - notify userspace about deletion of a station
  * @dev: the netdev
- * @mac_addr: the station's address
+ * @mac_addr: the station's address. For MLD station, MLD address is used.
  * @sinfo: the station information/statistics
  * @gfp: allocation flags
  */
@@ -7786,7 +7786,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
  * cfg80211_del_sta - notify userspace about deletion of a station
  *
  * @dev: the netdev
- * @mac_addr: the station's address
+ * @mac_addr: the station's address. For MLD station, MLD address is used.
  * @gfp: allocation flags
  */
 static inline void cfg80211_del_sta(struct net_device *dev,
-- 
cgit v1.2.3


From 42470fa093248807f668825ff14de9bc623c0d53 Mon Sep 17 00:00:00 2001
From: Muna Sinada <quic_msinada@quicinc.com>
Date: Wed, 5 Oct 2022 14:54:45 -0700
Subject: wifi: mac80211: Add VHT MU-MIMO related flags in ieee80211_bss_conf

Adding flags for SU Beamformer, SU Beamformee, MU Beamformer and
MU Beamformee for VHT. This is utilized to pass MU-MIMO
configurations from user space to driver in AP mode.

Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Link: https://lore.kernel.org/r/1665006886-23874-1-git-send-email-quic_msinada@quicinc.com
[fixed indentation, removed redundant !!]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b5b80f943e82..a0f67d49be05 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -653,6 +653,14 @@ struct ieee80211_fils_discovery {
  *	write-protected by sdata_lock and local->mtx so holding either is fine
  *	for read access.
  * @color_change_color: the bss color that will be used after the change.
+ * @vht_su_beamformer: in AP mode, does this BSS support operation as an VHT SU
+ *	beamformer
+ * @vht_su_beamformee: in AP mode, does this BSS support operation as an VHT SU
+ *	beamformee
+ * @vht_mu_beamformer: in AP mode, does this BSS support operation as an VHT MU
+ *	beamformer
+ * @vht_mu_beamformee: in AP mode, does this BSS support operation as an VHT MU
+ *	beamformee
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -726,6 +734,11 @@ struct ieee80211_bss_conf {
 
 	bool color_change_active;
 	u8 color_change_color;
+
+	bool vht_su_beamformer;
+	bool vht_su_beamformee;
+	bool vht_mu_beamformer;
+	bool vht_mu_beamformee;
 };
 
 /**
-- 
cgit v1.2.3


From b1b3297df7db7065476666ddbca5a61d081347ef Mon Sep 17 00:00:00 2001
From: Muna Sinada <quic_msinada@quicinc.com>
Date: Wed, 5 Oct 2022 14:54:46 -0700
Subject: wifi: mac80211: Add HE MU-MIMO related flags in ieee80211_bss_conf

Adding flags for SU Beamformer, SU Beamformee, MU Beamformer and Full
Bandwidth UL MU-MIMO for HE. This is utilized to pass MU-MIMO
configurations from user space to driver in AP mode.

Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Link: https://lore.kernel.org/r/1665006886-23874-2-git-send-email-quic_msinada@quicinc.com
[fixed indentation, removed redundant !!]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a0f67d49be05..65dd3982391f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -661,6 +661,15 @@ struct ieee80211_fils_discovery {
  *	beamformer
  * @vht_mu_beamformee: in AP mode, does this BSS support operation as an VHT MU
  *	beamformee
+ * @he_su_beamformer: in AP-mode, does this BSS support operation as an HE SU
+ *	beamformer
+ * @he_su_beamformee: in AP-mode, does this BSS support operation as an HE SU
+ *	beamformee
+ * @he_mu_beamformer: in AP-mode, does this BSS support operation as an HE MU
+ *	beamformer
+ * @he_full_ul_mumimo: does this BSS support the reception (AP) or transmission
+ *	(non-AP STA) of an HE TB PPDU on an RU that spans the entire PPDU
+ *	bandwidth
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -739,6 +748,10 @@ struct ieee80211_bss_conf {
 	bool vht_su_beamformee;
 	bool vht_mu_beamformer;
 	bool vht_mu_beamformee;
+	bool he_su_beamformer;
+	bool he_su_beamformee;
+	bool he_mu_beamformer;
+	bool he_full_ul_mumimo;
 };
 
 /**
-- 
cgit v1.2.3


From f66c48af7a110c0d694c4ac4a1257affb272a2ea Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 9 Jan 2023 13:07:21 +0200
Subject: mac80211: support minimal EHT rate reporting on RX

Add minimal support for RX EHT rate reporting, not yet
adding (modifying) any radiotap headers, just statistics
for cfg80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 65dd3982391f..e83cb9519e31 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1462,6 +1462,7 @@ enum mac80211_rx_encoding {
 	RX_ENC_HT,
 	RX_ENC_VHT,
 	RX_ENC_HE,
+	RX_ENC_EHT,
 };
 
 /**
@@ -1495,7 +1496,7 @@ enum mac80211_rx_encoding {
  * @antenna: antenna used
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *	HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT)
- * @nss: number of streams (VHT and HE only)
+ * @nss: number of streams (VHT, HE and EHT only)
  * @flag: %RX_FLAG_\*
  * @encoding: &enum mac80211_rx_encoding
  * @bw: &enum rate_info_bw
@@ -1503,6 +1504,8 @@ enum mac80211_rx_encoding {
  * @he_ru: HE RU, from &enum nl80211_he_ru_alloc
  * @he_gi: HE GI, from &enum nl80211_he_gi
  * @he_dcm: HE DCM value
+ * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc
+ * @eht.gi: EHT GI, from &enum nl80211_eht_gi
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *	each A-MPDU but the same for each subframe within one A-MPDU
@@ -1524,8 +1527,18 @@ struct ieee80211_rx_status {
 	u32 flag;
 	u16 freq: 13, freq_offset: 1;
 	u8 enc_flags;
-	u8 encoding:2, bw:3, he_ru:3;
-	u8 he_gi:2, he_dcm:1;
+	u8 encoding:3, bw:4;
+	union {
+		struct {
+			u8 he_ru:3;
+			u8 he_gi:2;
+			u8 he_dcm:1;
+		};
+		struct {
+			u8 ru:4;
+			u8 gi:2;
+		} eht;
+	};
 	u8 rate_idx;
 	u8 nss;
 	u8 rx_flags;
-- 
cgit v1.2.3


From 41ade47c1273ca0e61c36f2cccad37473f0b2422 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 19 Jan 2023 08:52:01 +0100
Subject: wifi: mac80211: add kernel-doc for EHT structure

Looks like this is required, even if all of the members
are separately described. Add a line to avoid the warning.

Fixes: f66c48af7a11 ("mac80211: support minimal EHT rate reporting on RX")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e83cb9519e31..a945f1b1b4d8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1504,6 +1504,7 @@ enum mac80211_rx_encoding {
  * @he_ru: HE RU, from &enum nl80211_he_ru_alloc
  * @he_gi: HE GI, from &enum nl80211_he_gi
  * @he_dcm: HE DCM value
+ * @eht: EHT specific rate information
  * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc
  * @eht.gi: EHT GI, from &enum nl80211_eht_gi
  * @rx_flags: internal RX flags for mac80211
-- 
cgit v1.2.3


From 82253ddaff582147cd3fd0e629c4e65d62b1d015 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 19 Jan 2023 14:57:11 +0100
Subject: wifi: mac80211: drop extra 'e' from ieeee80211... name

Somehow an extra 'e' slipped in there without anyone noticing,
drop that from ieeee80211_obss_color_collision_notify().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a945f1b1b4d8..2635e6de8101 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7210,7 +7210,7 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
 					  struct ieee80211_vif *vif);
 
 /**
- * ieeee80211_obss_color_collision_notify - notify userland about a BSS color
+ * ieee80211_obss_color_collision_notify - notify userland about a BSS color
  * collision.
  *
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -7219,8 +7219,8 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
  * @gfp: allocation flags
  */
 void
-ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
-				       u64 color_bitmap, gfp_t gfp);
+ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+				      u64 color_bitmap, gfp_t gfp);
 
 /**
  * ieee80211_is_tx_data - check if frame is a data frame
-- 
cgit v1.2.3


From 5cc9049cb9021a46ad5711a946eb3ded47eed0de Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 18 Jan 2023 16:21:04 +0100
Subject: devlink: remove linecards lock

Similar to other devlink objects, convert the linecards list to be
protected by devlink instance lock. Alongside with that rename the
create/destroy() functions to devl_* to indicate the devlink instance
lock needs to be held while calling them.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 425ecef431b7..d7c9572e5bea 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1687,9 +1687,9 @@ void devl_rate_nodes_destroy(struct devlink *devlink);
 void devlink_port_linecard_set(struct devlink_port *devlink_port,
 			       struct devlink_linecard *linecard);
 struct devlink_linecard *
-devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
-			const struct devlink_linecard_ops *ops, void *priv);
-void devlink_linecard_destroy(struct devlink_linecard *linecard);
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+		     const struct devlink_linecard_ops *ops, void *priv);
+void devl_linecard_destroy(struct devlink_linecard *linecard);
 void devlink_linecard_provision_set(struct devlink_linecard *linecard,
 				    const char *type);
 void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
-- 
cgit v1.2.3


From dfdfd1305ddecb990566193f2ba8a11bccba4cde Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 18 Jan 2023 16:21:08 +0100
Subject: devlink: protect health reporter operation with instance lock

Similar to other devlink objects, protect the reporters list
by devlink instance lock. Alongside add unlocked versions
of health reporter create/destroy functions and use them in drivers
on call paths where the instance lock is held.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d7c9572e5bea..0d64feaef7cb 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1865,18 +1865,34 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
 				 const void *value, u32 value_len);
 
 struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-			       const struct devlink_health_reporter_ops *ops,
-			       u64 graceful_period, void *priv);
+devl_port_health_reporter_create(struct devlink_port *port,
+				 const struct devlink_health_reporter_ops *ops,
+				 u64 graceful_period, void *priv);
 
 struct devlink_health_reporter *
 devlink_port_health_reporter_create(struct devlink_port *port,
 				    const struct devlink_health_reporter_ops *ops,
 				    u64 graceful_period, void *priv);
 
+struct devlink_health_reporter *
+devl_health_reporter_create(struct devlink *devlink,
+			    const struct devlink_health_reporter_ops *ops,
+			    u64 graceful_period, void *priv);
+
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, void *priv);
+
+void
+devl_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
 void
 devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
+void
+devl_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
 void
 devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
-- 
cgit v1.2.3


From 1dea3b4e4c52f4bed64d1c527d548e82ccaea15a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 18 Jan 2023 16:21:09 +0100
Subject: devlink: remove reporters_lock

Similar to other devlink objects, rely on devlink instance lock
and remove object specific reporters_lock.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 0d64feaef7cb..d9ea76bea36e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -146,7 +146,6 @@ struct devlink_port {
 	   initialized:1;
 	struct delayed_work type_warn_dw;
 	struct list_head reporter_list;
-	struct mutex reporters_lock; /* Protects reporter_list */
 
 	struct devlink_rate *devlink_rate;
 	struct devlink_linecard *linecard;
-- 
cgit v1.2.3


From 9f167327efecc3977deff0c852760e3759b0c2a7 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 18 Jan 2023 16:21:10 +0100
Subject: devlink: remove devl*_port_health_reporter_destroy()

Remove port-specific health reporter destroy function as it is
currently the same as the instance one so no longer needed. Inline
__devlink_health_reporter_destroy() as it is no longer called from
multiple places.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/devlink.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d9ea76bea36e..608a0c198be8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1889,12 +1889,6 @@ devl_health_reporter_destroy(struct devlink_health_reporter *reporter);
 void
 devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
-void
-devl_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
-
-void
-devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
-
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
 int devlink_health_report(struct devlink_health_reporter *reporter,
-- 
cgit v1.2.3


From 622f1b2fae2eea28a80b04f130e3bb54227699f8 Mon Sep 17 00:00:00 2001
From: Daniel Machon <daniel.machon@microchip.com>
Date: Wed, 18 Jan 2023 22:08:27 +0100
Subject: net: dcb: add new rewrite table

Add new rewrite table and all the required functions, offload hooks and
bookkeeping for maintaining it. The rewrite table reuses the app struct,
and the entire set of app selectors. As such, some bookeeping code can
be shared between the rewrite- and the APP table.

New functions for getting, setting and deleting entries has been added.
Apart from operating on the rewrite list, these functions do not emit a
DCB_APP_EVENT when the list os modified. The new dcb_getrewr does a
lookup based on selector and priority and returns the protocol, so that
mappings from priority to protocol, for a given selector and ifindex is
obtained.

Also, a new nested attribute has been added, that encapsulates one or
more app structs. This attribute is used to distinguish the two tables.

The dcb_lock used for the APP table is reused for the rewrite table.

Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 8841ab6c2de7..fe7dfb8bcb5b 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -19,6 +19,10 @@ struct dcb_app_type {
 	u8	dcbx;
 };
 
+u16 dcb_getrewr(struct net_device *dev, struct dcb_app *app);
+int dcb_setrewr(struct net_device *dev, struct dcb_app *app);
+int dcb_delrewr(struct net_device *dev, struct dcb_app *app);
+
 int dcb_setapp(struct net_device *, struct dcb_app *);
 u8 dcb_getapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
@@ -113,6 +117,10 @@ struct dcbnl_rtnl_ops {
 	/* apptrust */
 	int (*dcbnl_setapptrust)(struct net_device *, u8 *, int);
 	int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *);
+
+	/* rewrite */
+	int (*dcbnl_setrewr)(struct net_device *dev, struct dcb_app *app);
+	int (*dcbnl_delrewr)(struct net_device *dev, struct dcb_app *app);
 };
 
 #endif /* __NET_DCBNL_H__ */
-- 
cgit v1.2.3


From 1df99338e6d4e96178b68b3e17bab33e9f1eb628 Mon Sep 17 00:00:00 2001
From: Daniel Machon <daniel.machon@microchip.com>
Date: Wed, 18 Jan 2023 22:08:28 +0100
Subject: net: dcb: add helper functions to retrieve PCP and DSCP rewrite maps

Add two new helper functions to retrieve a mapping of priority to PCP
and DSCP bitmasks, where each bitmap contains ones in positions that
match a rewrite entry.

dcb_ieee_getrewr_prio_dscp_mask_map() reuses the dcb_ieee_app_prio_map,
as this struct is already used for a similar mapping in the app table.

Signed-off-by: Daniel Machon <daniel.machon@microchip.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index fe7dfb8bcb5b..42207fc44660 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -29,12 +29,22 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
+struct dcb_rewr_prio_pcp_map {
+	u16 map[IEEE_8021QAZ_MAX_TCS];
+};
+
+void dcb_getrewr_prio_pcp_mask_map(const struct net_device *dev,
+				   struct dcb_rewr_prio_pcp_map *p_map);
+
 struct dcb_ieee_app_prio_map {
 	u64 map[IEEE_8021QAZ_MAX_TCS];
 };
 void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
 					struct dcb_ieee_app_prio_map *p_map);
 
+void dcb_getrewr_prio_dscp_mask_map(const struct net_device *dev,
+				    struct dcb_ieee_app_prio_map *p_map);
+
 struct dcb_ieee_app_dscp_map {
 	u8 map[64];
 };
-- 
cgit v1.2.3


From 20e3028c39a5bf882e91e717da96d14f1acec40e Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 19 Jan 2023 12:59:10 -0800
Subject: net: mana: Fix IRQ name - add PCI and queue number

The PCI and queue number info is missing in IRQ names.

Add PCI and queue number to IRQ names, to allow CPU affinity
tuning scripts to work.

Cc: stable@vger.kernel.org
Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Link: https://lore.kernel.org/r/1674161950-19708-1-git-send-email-haiyangz@microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mana/gdma.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index b3ba04615caa..56189e4252da 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -336,9 +336,12 @@ struct gdma_queue_spec {
 	};
 };
 
+#define MANA_IRQ_NAME_SZ 32
+
 struct gdma_irq_context {
 	void (*handler)(void *arg);
 	void *arg;
+	char name[MANA_IRQ_NAME_SZ];
 };
 
 struct gdma_context {
-- 
cgit v1.2.3


From 5f6c2d498ad97cf9f85b81c0fbb205abbcdfe3f8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 19 Jan 2023 14:27:00 +0200
Subject: net: dsa: add plumbing for changing and getting MAC merge layer state

The DSA core is in charge of the ethtool_ops of the net devices
associated with switch ports, so in case a hardware driver supports the
MAC merge layer, DSA must pass the callbacks through to the driver.
Add support for precisely that.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 96086289aa9b..a15f17a38eca 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -937,6 +937,17 @@ struct dsa_switch_ops {
 	int	(*get_ts_info)(struct dsa_switch *ds, int port,
 			       struct ethtool_ts_info *ts);
 
+	/*
+	 * ethtool MAC merge layer
+	 */
+	int	(*get_mm)(struct dsa_switch *ds, int port,
+			  struct ethtool_mm_state *state);
+	int	(*set_mm)(struct dsa_switch *ds, int port,
+			  struct ethtool_mm_cfg *cfg,
+			  struct netlink_ext_ack *extack);
+	void	(*get_mm_stats)(struct dsa_switch *ds, int port,
+				struct ethtool_mm_stats *stats);
+
 	/*
 	 * DCB ops
 	 */
-- 
cgit v1.2.3


From 3d76a4d3d4e591af3e789698affaad88a5a8e8ab Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 19 Jan 2023 14:15:26 -0800
Subject: bpf: XDP metadata RX kfuncs

Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible
XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not
supported by the target device, the default implementation is called instead.
The verifier, at load time, replaces a call to the generic kfunc with a call
to the per-device one. Per-device kfunc pointers are stored in separate
struct xdp_metadata_ops.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-8-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/net/xdp.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 55dbc68bfffc..91292aa13bc0 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -409,4 +409,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
 
 #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
 
+#define XDP_METADATA_KFUNC_xxx	\
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
+			   bpf_xdp_metadata_rx_timestamp) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
+			   bpf_xdp_metadata_rx_hash) \
+
+enum {
+#define XDP_METADATA_KFUNC(name, _) name,
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+MAX_XDP_METADATA_KFUNC,
+};
+
+#ifdef CONFIG_NET
+u32 bpf_xdp_metadata_kfunc_id(int id);
+bool bpf_dev_bound_kfunc_id(u32 btf_id);
+#else
+static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
+static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
+#endif
+
 #endif /* __LINUX_NET_XDP_H__ */
-- 
cgit v1.2.3


From 94ecc5ca4dbf1f01bae6e32f5cd88c0fc5dc3cc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 19 Jan 2023 14:15:33 -0800
Subject: xsk: Add cb area to struct xdp_buff_xsk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an area after the xdp_buff in struct xdp_buff_xsk that drivers can use
to stash extra information to use in metadata kfuncs. The maximum size of
24 bytes means the full xdp_buff_xsk structure will take up exactly two
cache lines (with the cb field spanning both). Also add a macro drivers can
use to check their own wrapping structs against the available size.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-15-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/net/xsk_buff_pool.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index f787c3f524b0..3e952e569418 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -19,8 +19,11 @@ struct xdp_sock;
 struct device;
 struct page;
 
+#define XSK_PRIV_MAX 24
+
 struct xdp_buff_xsk {
 	struct xdp_buff xdp;
+	u8 cb[XSK_PRIV_MAX];
 	dma_addr_t dma;
 	dma_addr_t frame_dma;
 	struct xsk_buff_pool *pool;
@@ -28,6 +31,8 @@ struct xdp_buff_xsk {
 	struct list_head free_list_node;
 };
 
+#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
+
 struct xsk_dma_map {
 	dma_addr_t *dma_pages;
 	struct device *dev;
-- 
cgit v1.2.3


From 90317bcdbd337b9e88f253650f6ab9dfe667be64 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 23 Jan 2023 18:47:09 +0100
Subject: ipv6: Make ip6_route_output_flags_noref() static.

This function is only used in net/ipv6/route.c and has no reason to be
visible outside of it.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/50706db7f675e40b3594d62011d9363dce32b92e.1674495822.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_route.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 035d61d50a98..81ee387a1fc4 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -84,10 +84,6 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct flowi6 *fl6,
 					 const struct sk_buff *skb, int flags);
 
-struct dst_entry *ip6_route_output_flags_noref(struct net *net,
-					       const struct sock *sk,
-					       struct flowi6 *fl6, int flags);
-
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags);
 
-- 
cgit v1.2.3


From 89e7d2ba61b742a7525ff06ea4d4378c4a5560d0 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 23 Jan 2023 19:17:48 +0100
Subject: net/ism: Add new API for client registration

Add a new API that allows other drivers to concurrently access ISM devices.
To do so, we introduce a new API that allows other modules to register for
ISM device usage. Furthermore, we move the GID to struct ism, where it
belongs conceptually, and rename and relocate struct smcd_event to struct
ism_event.
This is the first part of a bigger overhaul of the interfaces between SMC
and ISM.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index c926d3313e05..98689b16b841 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include "linux/ism.h"
 
 struct sock;
 
@@ -48,14 +49,6 @@ struct smcd_dmb {
 
 #define ISM_ERROR	0xFFFF
 
-struct smcd_event {
-	u32 type;
-	u32 code;
-	u64 tok;
-	u64 time;
-	u64 info;
-};
-
 struct smcd_dev;
 
 struct smcd_ops {
@@ -100,6 +93,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 int smcd_register_dev(struct smcd_dev *smcd);
 void smcd_unregister_dev(struct smcd_dev *smcd);
 void smcd_free_dev(struct smcd_dev *smcd);
-void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
+void smcd_handle_event(struct smcd_dev *dev, struct ism_event *event);
 void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask);
 #endif	/* _SMC_H */
-- 
cgit v1.2.3


From 8747716f3942a610efdd12e3655df47269c268ac Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 23 Jan 2023 19:17:49 +0100
Subject: net/smc: Register SMC-D as ISM client

Register the smc module with the new ism device driver API.
This is the second part of a bigger overhaul of the interfaces between SMC
and ISM.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 98689b16b841..151aa54d9ad2 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -90,9 +90,6 @@ struct smcd_dev {
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 				const struct smcd_ops *ops, int max_dmbs);
-int smcd_register_dev(struct smcd_dev *smcd);
-void smcd_unregister_dev(struct smcd_dev *smcd);
 void smcd_free_dev(struct smcd_dev *smcd);
-void smcd_handle_event(struct smcd_dev *dev, struct ism_event *event);
-void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask);
+
 #endif	/* _SMC_H */
-- 
cgit v1.2.3


From 9de4df7b6be1cfca500f8ba21137d53eec45418a Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 23 Jan 2023 19:17:50 +0100
Subject: net/smc: Separate SMC-D and ISM APIs

We separate the code implementing the struct smcd_ops API in the ISM
device driver from the functions that may be used by other exploiters of
ISM devices.
Note: We start out small, and don't offer the whole breadth of the ISM
device for public use, as many functions are specific to or likely only
ever used in the context of SMC-D.
This is the third part of a bigger overhaul of the interfaces between SMC
and ISM.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 151aa54d9ad2..d5f8f18169d7 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -66,14 +66,15 @@ struct smcd_ops {
 			 bool sf, unsigned int offset, void *data,
 			 unsigned int size);
 	u8* (*get_system_eid)(void);
+	u64 (*get_local_gid)(struct smcd_dev *dev);
 	u16 (*get_chid)(struct smcd_dev *dev);
 };
 
 struct smcd_dev {
 	const struct smcd_ops *ops;
 	struct device dev;
+	struct ism_dev *ism;
 	void *priv;
-	u64 local_gid;
 	struct list_head list;
 	spinlock_t lock;
 	struct smc_connection **conn;
-- 
cgit v1.2.3


From 820f21009f1bc7a69e28752f6c6d9544401ca526 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 23 Jan 2023 19:17:51 +0100
Subject: s390/ism: Consolidate SMC-D-related code

The ism module had SMC-D-specific code sprinkled across the entire module.
We are now consolidating the SMC-D-specific parts into the latter parts
of the module, so it becomes more clear what code is intended for use with
ISM, and which parts are glue code for usage in the context of SMC-D.
This is the fourth part of a bigger overhaul of the interfaces between SMC
and ISM.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index d5f8f18169d7..556b96c12279 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -50,11 +50,13 @@ struct smcd_dmb {
 #define ISM_ERROR	0xFFFF
 
 struct smcd_dev;
+struct ism_client;
 
 struct smcd_ops {
 	int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
 				u32 vid);
-	int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+	int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb,
+			    struct ism_client *client);
 	int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
 	int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
 	int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
@@ -73,7 +75,6 @@ struct smcd_ops {
 struct smcd_dev {
 	const struct smcd_ops *ops;
 	struct device dev;
-	struct ism_dev *ism;
 	void *priv;
 	struct list_head list;
 	spinlock_t lock;
-- 
cgit v1.2.3


From 8c81ba20349daf9f7e58bb05a0c12f4b71813a30 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 23 Jan 2023 19:17:52 +0100
Subject: net/smc: De-tangle ism and smc device initialization

The struct device for ISM devices was part of struct smcd_dev. Move to
struct ism_dev, provide a new API call in struct smcd_ops, and convert
existing SMCD code accordingly.
Furthermore, remove struct smcd_dev from struct ism_dev.
This is the final part of a bigger overhaul of the interfaces between SMC
and ISM.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/smc.h b/include/net/smc.h
index 556b96c12279..597cb9381182 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -70,11 +70,11 @@ struct smcd_ops {
 	u8* (*get_system_eid)(void);
 	u64 (*get_local_gid)(struct smcd_dev *dev);
 	u16 (*get_chid)(struct smcd_dev *dev);
+	struct device* (*get_dev)(struct smcd_dev *dev);
 };
 
 struct smcd_dev {
 	const struct smcd_ops *ops;
-	struct device dev;
 	void *priv;
 	struct list_head list;
 	spinlock_t lock;
@@ -90,8 +90,4 @@ struct smcd_dev {
 	u8 going_away : 1;
 };
 
-struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
-				const struct smcd_ops *ops, int max_dmbs);
-void smcd_free_dev(struct smcd_dev *smcd);
-
 #endif	/* _SMC_H */
-- 
cgit v1.2.3


From 91d0b78c5177f3e42a4d8738af8ac19c3a90d002 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Tue, 24 Jan 2023 14:36:43 +0100
Subject: inet: Add IP_LOCAL_PORT_RANGE socket option

Users who want to share a single public IP address for outgoing connections
between several hosts traditionally reach for SNAT. However, SNAT requires
state keeping on the node(s) performing the NAT.

A stateless alternative exists, where a single IP address used for egress
can be shared between several hosts by partitioning the available ephemeral
port range. In such a setup:

1. Each host gets assigned a disjoint range of ephemeral ports.
2. Applications open connections from the host-assigned port range.
3. Return traffic gets routed to the host based on both, the destination IP
   and the destination port.

An application which wants to open an outgoing connection (connect) from a
given port range today can choose between two solutions:

1. Manually pick the source port by bind()'ing to it before connect()'ing
   the socket.

   This approach has a couple of downsides:

   a) Search for a free port has to be implemented in the user-space. If
      the chosen 4-tuple happens to be busy, the application needs to retry
      from a different local port number.

      Detecting if 4-tuple is busy can be either easy (TCP) or hard
      (UDP). In TCP case, the application simply has to check if connect()
      returned an error (EADDRNOTAVAIL). That is assuming that the local
      port sharing was enabled (REUSEADDR) by all the sockets.

        # Assume desired local port range is 60_000-60_511
        s = socket(AF_INET, SOCK_STREAM)
        s.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        s.bind(("192.0.2.1", 60_000))
        s.connect(("1.1.1.1", 53))
        # Fails only if 192.0.2.1:60000 -> 1.1.1.1:53 is busy
        # Application must retry with another local port

      In case of UDP, the network stack allows binding more than one socket
      to the same 4-tuple, when local port sharing is enabled
      (REUSEADDR). Hence detecting the conflict is much harder and involves
      querying sock_diag and toggling the REUSEADDR flag [1].

   b) For TCP, bind()-ing to a port within the ephemeral port range means
      that no connecting sockets, that is those which leave it to the
      network stack to find a free local port at connect() time, can use
      the this port.

      IOW, the bind hash bucket tb->fastreuse will be 0 or 1, and the port
      will be skipped during the free port search at connect() time.

2. Isolate the app in a dedicated netns and use the use the per-netns
   ip_local_port_range sysctl to adjust the ephemeral port range bounds.

   The per-netns setting affects all sockets, so this approach can be used
   only if:

   - there is just one egress IP address, or
   - the desired egress port range is the same for all egress IP addresses
     used by the application.

   For TCP, this approach avoids the downsides of (1). Free port search and
   4-tuple conflict detection is done by the network stack:

     system("sysctl -w net.ipv4.ip_local_port_range='60000 60511'")

     s = socket(AF_INET, SOCK_STREAM)
     s.setsockopt(SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1)
     s.bind(("192.0.2.1", 0))
     s.connect(("1.1.1.1", 53))
     # Fails if all 4-tuples 192.0.2.1:60000-60511 -> 1.1.1.1:53 are busy

  For UDP this approach has limited applicability. Setting the
  IP_BIND_ADDRESS_NO_PORT socket option does not result in local source
  port being shared with other connected UDP sockets.

  Hence relying on the network stack to find a free source port, limits the
  number of outgoing UDP flows from a single IP address down to the number
  of available ephemeral ports.

To put it another way, partitioning the ephemeral port range between hosts
using the existing Linux networking API is cumbersome.

To address this use case, add a new socket option at the SOL_IP level,
named IP_LOCAL_PORT_RANGE. The new option can be used to clamp down the
ephemeral port range for each socket individually.

The option can be used only to narrow down the per-netns local port
range. If the per-socket range lies outside of the per-netns range, the
latter takes precedence.

UAPI-wise, the low and high range bounds are passed to the kernel as a pair
of u16 values in host byte order packed into a u32. This avoids pointer
passing.

  PORT_LO = 40_000
  PORT_HI = 40_511

  s = socket(AF_INET, SOCK_STREAM)
  v = struct.pack("I", PORT_HI << 16 | PORT_LO)
  s.setsockopt(SOL_IP, IP_LOCAL_PORT_RANGE, v)
  s.bind(("127.0.0.1", 0))
  s.getsockname()
  # Local address between ("127.0.0.1", 40_000) and ("127.0.0.1", 40_511),
  # if there is a free port. EADDRINUSE otherwise.

[1] https://github.com/cloudflare/cloudflare-blog/blob/232b432c1d57/2022-02-connectx/connectx.py#L116

Reviewed-by: Marek Majkowski <marek@cloudflare.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_sock.h | 4 ++++
 include/net/ip.h        | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index bf5654ce711e..51857117ac09 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -249,6 +249,10 @@ struct inet_sock {
 	__be32			mc_addr;
 	struct ip_mc_socklist __rcu	*mc_list;
 	struct inet_cork_full	cork;
+	struct {
+		__u16 lo;
+		__u16 hi;
+	}			local_port_range;
 };
 
 #define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
diff --git a/include/net/ip.h b/include/net/ip.h
index 144bdfbb25af..c3fffaa92d6e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -340,7 +340,8 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
 	} \
 }
 
-void inet_get_local_port_range(struct net *net, int *low, int *high);
+void inet_get_local_port_range(const struct net *net, int *low, int *high);
+void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
 
 #ifdef CONFIG_SYSCTL
 static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
-- 
cgit v1.2.3


From 68f4eae781dd25aca2eb84ca2279663689db8d19 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 25 Jan 2023 23:14:16 -0800
Subject: net: checksum: drop the linux/uaccess.h include

net/checksum.h pulls in linux/uaccess.h which is large.

In the x86 header the include seems to not be needed at all.
ARM on the other hand does not include uaccess.h, even tho
it calls access_ok().

In the generic implementation guard the include of linux/uaccess.h
with the same condition as the code that needs it.

With this change pre-processed net/checksum.h shrinks on x86
from 30616 lines to just 1193.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 6bc783b7a06c..1338cb92c8e7 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -18,8 +18,10 @@
 #include <linux/errno.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
-#include <linux/uaccess.h>
 #include <asm/checksum.h>
+#if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER)
+#include <linux/uaccess.h>
+#endif
 
 #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static __always_inline
-- 
cgit v1.2.3


From 21bf73158fe7ae8a3d55618e58edc958f84739a8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 25 Jan 2023 23:14:24 -0800
Subject: net: remove unnecessary includes from net/flow.h

This file is included by a lot of other commonly included
headers, it doesn't need socket.h or flow_dissector.h.

This reduces the size of this file after pre-processing
from 28165 to 4663.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 2f0da4f0318b..bb8651a6eaa7 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -8,12 +8,13 @@
 #ifndef _NET_FLOW_H
 #define _NET_FLOW_H
 
-#include <linux/socket.h>
 #include <linux/in6.h>
 #include <linux/atomic.h>
-#include <net/flow_dissector.h>
+#include <linux/container_of.h>
 #include <linux/uidgid.h>
 
+struct flow_keys;
+
 /*
  * ifindex generation is per-net namespace, and loopback is
  * always the 1st device in ns (see net_dev_init), thus any
-- 
cgit v1.2.3


From 020dd127a3fef9dfc6c03cd5d1c231d5e55d7632 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 26 Jan 2023 08:58:29 +0100
Subject: devlink: make devlink_param_register/unregister static

There is no user outside the devlink code, so remove the export and make
the functions static. Move them before callers to avoid forward
declarations.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 608a0c198be8..cf74b6391896 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1773,10 +1773,6 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
-int devlink_param_register(struct devlink *devlink,
-			   const struct devlink_param *param);
-void devlink_param_unregister(struct devlink *devlink,
-			      const struct devlink_param *param);
 int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
-- 
cgit v1.2.3


From 85fe0b324c830ac671b811efc70ea80c3dcb2390 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 26 Jan 2023 08:58:33 +0100
Subject: devlink: make devlink_param_driverinit_value_set() return void

devlink_param_driverinit_value_set() currently returns int with possible
error, but no user is checking it anyway. The only reason for a fail is
a driver bug. So convert the function to return void and put WARN_ONs
on error paths.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index cf74b6391896..e0d773dfa637 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1775,8 +1775,8 @@ void devlink_params_unregister(struct devlink *devlink,
 			       size_t params_count);
 int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
-int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
-				       union devlink_param_value init_val);
+void devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+					union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
 struct devlink_region *devl_region_create(struct devlink *devlink,
 					  const struct devlink_region_ops *ops,
-- 
cgit v1.2.3


From 075935f0ae0fbbe469a911d685f6cc59de892700 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 26 Jan 2023 08:58:35 +0100
Subject: devlink: protect devlink param list by instance lock

Commit 1d18bb1a4ddd ("devlink: allow registering parameters after
the instance") as the subject implies introduced possibility to register
devlink params even for already registered devlink instance. This is a
bit problematic, as the consistency or params list was originally
secured by the fact it is static during devlink lifetime. So in order to
protect the params list, take devlink instance lock during the params
operations. Introduce unlocked function variants and use them in drivers
in locked context. Put lock assertions to appropriate places.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Tested-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e0d773dfa637..ab654cf552b8 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1767,17 +1767,23 @@ void devl_resource_occ_get_unregister(struct devlink *devlink,
 
 void devlink_resource_occ_get_unregister(struct devlink *devlink,
 					 u64 resource_id);
+int devl_params_register(struct devlink *devlink,
+			 const struct devlink_param *params,
+			 size_t params_count);
 int devlink_params_register(struct devlink *devlink,
 			    const struct devlink_param *params,
 			    size_t params_count);
+void devl_params_unregister(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count);
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
-				       union devlink_param_value *init_val);
-void devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
-					union devlink_param_value init_val);
-void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				    union devlink_param_value *init_val);
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				     union devlink_param_value init_val);
+void devl_param_value_changed(struct devlink *devlink, u32 param_id);
 struct devlink_region *devl_region_create(struct devlink *devlink,
 					  const struct devlink_region_ops *ops,
 					  u32 region_max_snapshots,
-- 
cgit v1.2.3


From 70eb3911d80f548a76fb9a40c8a3fd93ac061a42 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Jan 2023 08:45:06 +0100
Subject: net: netlink: recommend policy range validation

For large ranges (outside of s16) the documentation currently
recommends open-coding the validation, but it's better to use
the NLA_POLICY_FULL_RANGE() or NLA_POLICY_FULL_RANGE_SIGNED()
policy validation instead; recommend that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20230127084506.09f280619d64.I5dece85f06efa8ab0f474ca77df9e26d3553d4ab@changeid
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 6e1e670e06bc..b12cd957abb4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -276,7 +276,8 @@ enum nla_policy_validation {
  *                         Note that in the interest of code simplicity and
  *                         struct size both limits are s16, so you cannot
  *                         enforce a range that doesn't fall within the range
- *                         of s16 - do that as usual in the code instead.
+ *                         of s16 - do that using the NLA_POLICY_FULL_RANGE()
+ *                         or NLA_POLICY_FULL_RANGE_SIGNED() macros instead.
  *                         Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
  *                         NLA_POLICY_RANGE() macros.
  *    NLA_U8,
-- 
cgit v1.2.3


From 9bc114504b07207d671593f6f6d787d55dcf91bd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jan 2023 11:29:22 +0100
Subject: ieee802154: Add support for user beaconing requests

Parse user requests for sending beacons, start sending beacons at a
regular pace. If needed, the pace can be updated with a new request. The
process can also be interrupted at any moment.

The page and channel must be changed beforehands if needed. Interval
orders above 14 are reserved to tell a device it must answer BEACON_REQ
coming from another device as part of an active scan procedure and this
is not yet supported.

A netlink "beacon request" structure is created to list the
requirements.

Mac layers may now implement the ->send_beacons() and
->stop_beacons() hooks.

Co-developed-by: David Girault <david.girault@qorvo.com>
Signed-off-by: David Girault <david.girault@qorvo.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230125102923.135465-2-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/cfg802154.h | 23 +++++++++++++++++++++++
 include/net/nl802154.h  |  3 +++
 2 files changed, 26 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 0b0f81a945b6..0c2778a836db 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -19,6 +19,7 @@
 struct wpan_phy;
 struct wpan_phy_cca;
 struct cfg802154_scan_request;
+struct cfg802154_beacon_request;
 
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 struct ieee802154_llsec_device_key;
@@ -72,6 +73,10 @@ struct cfg802154_ops {
 				struct cfg802154_scan_request *request);
 	int	(*abort_scan)(struct wpan_phy *wpan_phy,
 			      struct wpan_dev *wpan_dev);
+	int	(*send_beacons)(struct wpan_phy *wpan_phy,
+				struct cfg802154_beacon_request *request);
+	int	(*stop_beacons)(struct wpan_phy *wpan_phy,
+				struct wpan_dev *wpan_dev);
 #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
 	void	(*get_llsec_table)(struct wpan_phy *wpan_phy,
 				   struct wpan_dev *wpan_dev,
@@ -314,6 +319,24 @@ struct cfg802154_scan_request {
 	struct wpan_phy *wpan_phy;
 };
 
+/**
+ * struct cfg802154_beacon_request - Beacon request descriptor
+ *
+ * @interval: interval n between sendings, in multiple order of the super frame
+ *            duration: aBaseSuperframeDuration * (2^n) unless the interval
+ *            order is greater or equal to 15, in this case beacons won't be
+ *            passively sent out at a fixed rate but instead inform the device
+ *            that it should answer beacon requests as part of active scan
+ *            procedures
+ * @wpan_dev: the concerned wpan device
+ * @wpan_phy: the wpan phy this was for
+ */
+struct cfg802154_beacon_request {
+	u8 interval;
+	struct wpan_dev *wpan_dev;
+	struct wpan_phy *wpan_phy;
+};
+
 /**
  * struct cfg802154_mac_pkt - MAC packet descriptor (beacon/command)
  * @node: MAC packets to process list member
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index c267fa1c5aac..8cd9d141f5af 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -76,6 +76,8 @@ enum nl802154_commands {
 	NL802154_CMD_TRIGGER_SCAN,
 	NL802154_CMD_ABORT_SCAN,
 	NL802154_CMD_SCAN_DONE,
+	NL802154_CMD_SEND_BEACONS,
+	NL802154_CMD_STOP_BEACONS,
 
 	/* add new commands above here */
 
@@ -144,6 +146,7 @@ enum nl802154_attrs {
 	NL802154_ATTR_SCAN_MEAN_PRF,
 	NL802154_ATTR_SCAN_DURATION,
 	NL802154_ATTR_SCAN_DONE_REASON,
+	NL802154_ATTR_BEACON_INTERVAL,
 
 	/* add attributes here, update the policy in nl802154.c */
 
-- 
cgit v1.2.3


From 3accf4762734a69ebd03cba989249c78ac7dfc7e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jan 2023 11:29:23 +0100
Subject: mac802154: Handle basic beaconing

Implement the core hooks in order to provide the softMAC layer support
for sending beacons. Coordinators may be requested to send beacons in a
beacon enabled PAN in order for the other devices around to self
discover the available PANs automatically.

Changing the channels is prohibited while a beacon operation is
ongoing.

The implementation uses a workqueue triggered at a certain interval
depending on the symbol duration for the current channel and the
interval order provided.

Sending beacons in response to a BEACON_REQ frame (ie. answering active
scans) is not yet supported.

This initial patchset has no security support (llsec).

Co-developed-by: David Girault <david.girault@qorvo.com>
Signed-off-by: David Girault <david.girault@qorvo.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20230125102923.135465-3-miquel.raynal@bootlin.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/ieee802154_netdev.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 2f2196049a86..da8a3e648c7a 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -129,6 +129,13 @@ enum ieee802154_frame_version {
 	IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD,
 };
 
+enum ieee802154_addressing_mode {
+	IEEE802154_NO_ADDRESSING,
+	IEEE802154_RESERVED,
+	IEEE802154_SHORT_ADDRESSING,
+	IEEE802154_EXTENDED_ADDRESSING,
+};
+
 struct ieee802154_hdr {
 	struct ieee802154_hdr_fc fc;
 	u8 seq;
@@ -137,6 +144,11 @@ struct ieee802154_hdr {
 	struct ieee802154_sechdr sec;
 };
 
+struct ieee802154_beacon_frame {
+	struct ieee802154_hdr mhr;
+	struct ieee802154_beacon_hdr mac_pl;
+};
+
 /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from
  * the contents of hdr will be, and the actual value of those bits in
  * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame
@@ -162,6 +174,10 @@ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb,
  */
 int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr);
 
+/* pushes a beacon frame into an skb */
+int ieee802154_beacon_push(struct sk_buff *skb,
+			   struct ieee802154_beacon_frame *beacon);
+
 int ieee802154_max_payload(const struct ieee802154_hdr *hdr);
 
 static inline int
-- 
cgit v1.2.3


From fb8421a94c5613fee86e192bab0892ecb1d56e4c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Fri, 27 Jan 2023 16:50:42 +0100
Subject: devlink: remove devlink features

Devlink features were introduced to disallow devlink reload calls of
userspace before the devlink was fully initialized. The reason for this
workaround was the fact that devlink reload was originally called
without devlink instance lock held.

However, with recent changes that converted devlink reload to be
performed under devlink instance lock, this is redundant so remove
devlink features entirely.

Note that mlx5 used this to enable devlink reload conditionally only
when device didn't act as multi port slave. Move the multi port check
into mlx5_devlink_reload_down() callback alongside with the other
checks preventing the device from reload in certain states.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ab654cf552b8..2e85a5970a32 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1645,7 +1645,7 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
 {
 	return devlink_alloc_ns(ops, priv_size, &init_net, dev);
 }
-void devlink_set_features(struct devlink *devlink, u64 features);
+
 int devl_register(struct devlink *devlink);
 void devl_unregister(struct devlink *devlink);
 void devlink_register(struct devlink *devlink);
-- 
cgit v1.2.3


From 058a8f7f73aae1cc22b53fcefec031b9e391b54d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 28 Jan 2023 10:58:30 -0500
Subject: net: add a couple of helpers for iph tot_len

This patch adds three APIs to replace the iph->tot_len setting
and getting in all places where IPv4 BIG TCP packets may reach,
they will be used in the following patches.

Note that iph_totlen() will be used when iph is not in linear
data of the skb.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/route.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 6e92dd5bcd61..fe00b0a2e475 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -35,9 +35,6 @@
 #include <linux/cache.h>
 #include <linux/security.h>
 
-/* IPv4 datagram length is stored into 16bit field (tot_len) */
-#define IP_MAX_MTU	0xFFFFU
-
 #define RTO_ONLINK	0x01
 
 #define RT_CONN_FLAGS(sk)   (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
-- 
cgit v1.2.3


From a13fbf5ed5b4fc9095f12e955ca3a59b5507ff01 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 28 Jan 2023 10:58:34 -0500
Subject: netfilter: use skb_ip_totlen and iph_totlen

There are also quite some places in netfilter that may process IPv4 TCP
GSO packets, we need to replace them too.

In length_mt(), we have to use u_int32_t/int to accept skb_ip_totlen()
return value, otherwise it may overflow and mismatch. This change will
also help us add selftest for IPv4 BIG TCP in the following patch.

Note that we don't need to replace the one in tcpmss_tg4(), as it will
return if there is data after tcphdr in tcpmss_mangle_packet(). The
same in mangle_contents() in nf_nat_helper.c, it returns false when
skb->len + extra > 65535 in enlarge_skb().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netfilter/nf_tables_ipv4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index 112708f7a6b4..947973623dc7 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -29,7 +29,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 	if (iph->ihl < 5 || iph->version != 4)
 		return -1;
 
-	len = ntohs(iph->tot_len);
+	len = iph_totlen(pkt->skb, iph);
 	thoff = iph->ihl * 4;
 	if (pkt->skb->len < len)
 		return -1;
@@ -64,7 +64,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
 	if (iph->ihl < 5 || iph->version != 4)
 		goto inhdr_error;
 
-	len = ntohs(iph->tot_len);
+	len = iph_totlen(pkt->skb, iph);
 	thoff = iph->ihl * 4;
 	if (pkt->skb->len < len) {
 		__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
-- 
cgit v1.2.3


From 52cf89f78c01bf39973f3e70d366921d70faff7a Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 31 Jan 2023 16:05:11 -0300
Subject: net/sched: transition act_pedit to rcu and percpu stats

The software pedit action didn't get the same love as some of the
other actions and it's still using spinlocks and shared stats in the
datapath.
Transition the action to rcu and percpu stats as this improves the
action's performance dramatically on multiple cpu deployments.

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_act/tc_pedit.h | 81 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 15 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 3e02709a1df6..83fe39931781 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -4,22 +4,29 @@
 
 #include <net/act_api.h>
 #include <linux/tc_act/tc_pedit.h>
+#include <linux/types.h>
 
 struct tcf_pedit_key_ex {
 	enum pedit_header_type htype;
 	enum pedit_cmd cmd;
 };
 
-struct tcf_pedit {
-	struct tc_action	common;
-	unsigned char		tcfp_nkeys;
-	unsigned char		tcfp_flags;
-	u32			tcfp_off_max_hint;
+struct tcf_pedit_parms {
 	struct tc_pedit_key	*tcfp_keys;
 	struct tcf_pedit_key_ex	*tcfp_keys_ex;
+	u32 tcfp_off_max_hint;
+	unsigned char tcfp_nkeys;
+	unsigned char tcfp_flags;
+	struct rcu_head rcu;
+};
+
+struct tcf_pedit {
+	struct tc_action common;
+	struct tcf_pedit_parms __rcu *parms;
 };
 
 #define to_pedit(a) ((struct tcf_pedit *)a)
+#define to_pedit_parms(a) (rcu_dereference(to_pedit(a)->parms))
 
 static inline bool is_tcf_pedit(const struct tc_action *a)
 {
@@ -32,37 +39,81 @@ static inline bool is_tcf_pedit(const struct tc_action *a)
 
 static inline int tcf_pedit_nkeys(const struct tc_action *a)
 {
-	return to_pedit(a)->tcfp_nkeys;
+	struct tcf_pedit_parms *parms;
+	int nkeys;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	nkeys = parms->tcfp_nkeys;
+	rcu_read_unlock();
+
+	return nkeys;
 }
 
 static inline u32 tcf_pedit_htype(const struct tc_action *a, int index)
 {
-	if (to_pedit(a)->tcfp_keys_ex)
-		return to_pedit(a)->tcfp_keys_ex[index].htype;
+	u32 htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+	struct tcf_pedit_parms *parms;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	if (parms->tcfp_keys_ex)
+		htype = parms->tcfp_keys_ex[index].htype;
+	rcu_read_unlock();
 
-	return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+	return htype;
 }
 
 static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index)
 {
-	if (to_pedit(a)->tcfp_keys_ex)
-		return to_pedit(a)->tcfp_keys_ex[index].cmd;
+	struct tcf_pedit_parms *parms;
+	u32 cmd = __PEDIT_CMD_MAX;
 
-	return __PEDIT_CMD_MAX;
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	if (parms->tcfp_keys_ex)
+		cmd = parms->tcfp_keys_ex[index].cmd;
+	rcu_read_unlock();
+
+	return cmd;
 }
 
 static inline u32 tcf_pedit_mask(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].mask;
+	struct tcf_pedit_parms *parms;
+	u32 mask;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	mask = parms->tcfp_keys[index].mask;
+	rcu_read_unlock();
+
+	return mask;
 }
 
 static inline u32 tcf_pedit_val(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].val;
+	struct tcf_pedit_parms *parms;
+	u32 val;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	val = parms->tcfp_keys[index].val;
+	rcu_read_unlock();
+
+	return val;
 }
 
 static inline u32 tcf_pedit_offset(const struct tc_action *a, int index)
 {
-	return to_pedit(a)->tcfp_keys[index].off;
+	struct tcf_pedit_parms *parms;
+	u32 off;
+
+	rcu_read_lock();
+	parms = to_pedit_parms(a);
+	off = parms->tcfp_keys[index].off;
+	rcu_read_unlock();
+
+	return off;
 }
 #endif /* __NET_TC_PED_H */
-- 
cgit v1.2.3


From e4d0fe71f59dc5137a2793ff7560730d80d1e1f4 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 1 Feb 2023 19:56:53 +0200
Subject: ipvs: avoid kfree_rcu without 2nd arg

Avoid possible synchronize_rcu() as part from the
kfree_rcu() call when 2nd arg is not provided.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c6c61100d244..6d71a5ff52df 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -461,6 +461,7 @@ void ip_vs_stats_free(struct ip_vs_stats *stats);
 
 /* Multiple chains processed in same tick */
 struct ip_vs_est_tick_data {
+	struct rcu_head		rcu_head;
 	struct hlist_head	chains[IPVS_EST_TICK_CHAINS];
 	DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS);
 	DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS);
-- 
cgit v1.2.3


From d3d854fd6a1d97157f790604e07f6386e8df8fe4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 1 Feb 2023 11:24:17 +0100
Subject: netdev-genl: create a simple family for netdev stuff

Add a Netlink spec-compatible family for netdevs.
This is a very simple implementation without much
thought going into it.

It allows us to reap all the benefits of Netlink specs,
one can use the generic client to issue the commands:

  $ ./cli.py --spec netdev.yaml --dump dev_get
  [{'ifindex': 1, 'xdp-features': set()},
   {'ifindex': 2, 'xdp-features': {'basic', 'ndo-xmit', 'redirect'}},
   {'ifindex': 3, 'xdp-features': {'rx-sg'}}]

the generic python library does not have flags-by-name
support, yet, but we also don't have to carry strings
in the messages, as user space can get the names from
the spec.

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Co-developed-by: Marek Majtyka <alardam@gmail.com>
Signed-off-by: Marek Majtyka <alardam@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/327ad9c9868becbe1e601b580c962549c8cd81f2.1675245258.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 91292aa13bc0..8d1c86914f4c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -7,6 +7,7 @@
 #define __LINUX_NET_XDP_H__
 
 #include <linux/skbuff.h> /* skb_shared_info */
+#include <uapi/linux/netdev.h>
 
 /**
  * DOC: XDP RX-queue information
@@ -43,6 +44,8 @@ enum xdp_mem_type {
 	MEM_TYPE_MAX,
 };
 
+typedef u32 xdp_features_t;
+
 /* XDP flags for ndo_xdp_xmit */
 #define XDP_XMIT_FLUSH		(1U << 0)	/* doorbell signal consumer */
 #define XDP_XMIT_FLAGS_MASK	XDP_XMIT_FLUSH
-- 
cgit v1.2.3


From 66c0e13ad236c74ea88c7c1518f3cef7f372e3da Mon Sep 17 00:00:00 2001
From: Marek Majtyka <alardam@gmail.com>
Date: Wed, 1 Feb 2023 11:24:18 +0100
Subject: drivers: net: turn on XDP features

A summary of the flags being set for various drivers is given below.
Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features
that can be turned off and on at runtime. This means that these flags
may be set and unset under RTNL lock protection by the driver. Hence,
READ_ONCE must be used by code loading the flag value.

Also, these flags are not used for synchronization against the availability
of XDP resources on a device. It is merely a hint, and hence the read
may race with the actual teardown of XDP resources on the device. This
may change in the future, e.g. operations taking a reference on the XDP
resources of the driver, and in turn inhibiting turning off this flag.
However, for now, it can only be used as a hint to check whether device
supports becoming a redirection target.

Turn 'hw-offload' feature flag on for:
 - netronome (nfp)
 - netdevsim.

Turn 'native' and 'zerocopy' features flags on for:
 - intel (i40e, ice, ixgbe, igc)
 - mellanox (mlx5).
 - stmmac
 - netronome (nfp)

Turn 'native' features flags on for:
 - amazon (ena)
 - broadcom (bnxt)
 - freescale (dpaa, dpaa2, enetc)
 - funeth
 - intel (igb)
 - marvell (mvneta, mvpp2, octeontx2)
 - mellanox (mlx4)
 - mtk_eth_soc
 - qlogic (qede)
 - sfc
 - socionext (netsec)
 - ti (cpsw)
 - tap
 - tsnep
 - veth
 - xen
 - virtio_net.

Turn 'basic' (tx, pass, aborted and drop) features flags on for:
 - netronome (nfp)
 - cavium (thunder)
 - hyperv.

Turn 'redirect_target' feature flag on for:
 - amanzon (ena)
 - broadcom (bnxt)
 - freescale (dpaa, dpaa2)
 - intel (i40e, ice, igb, ixgbe)
 - ti (cpsw)
 - marvell (mvneta, mvpp2)
 - sfc
 - socionext (netsec)
 - qlogic (qede)
 - mellanox (mlx5)
 - tap
 - veth
 - virtio_net
 - xen

Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Marek Majtyka <alardam@gmail.com>
Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 8d1c86914f4c..d517bfac937b 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -428,9 +428,21 @@ MAX_XDP_METADATA_KFUNC,
 #ifdef CONFIG_NET
 u32 bpf_xdp_metadata_kfunc_id(int id);
 bool bpf_dev_bound_kfunc_id(u32 btf_id);
+void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg);
+void xdp_features_clear_redirect_target(struct net_device *dev);
 #else
 static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
 static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
+
+static inline void
+xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+{
+}
+
+static inline void
+xdp_features_clear_redirect_target(struct net_device *dev)
+{
+}
 #endif
 
 #endif /* __LINUX_NET_XDP_H__ */
-- 
cgit v1.2.3


From 2798e36dc233a409a5d3f26f73029596dc504020 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Feb 2023 17:43:45 +0000
Subject: tcp: add TCP_MINTTL drop reason

In the unlikely case incoming packets are dropped because
of IP_MINTTL / IPV6_MINHOPCOUNT constraints...

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230201174345.2708943-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dropreason.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 70539288f995..94bc3d5d8803 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -71,6 +71,7 @@
 	FN(DUP_FRAG)			\
 	FN(FRAG_REASM_TIMEOUT)		\
 	FN(FRAG_TOO_FAR)		\
+	FN(TCP_MINTTL)			\
 	FNe(MAX)
 
 /**
@@ -312,6 +313,11 @@ enum skb_drop_reason {
 	 * (/proc/sys/net/ipv4/ipfrag_max_dist)
 	 */
 	SKB_DROP_REASON_FRAG_TOO_FAR,
+	/**
+	 * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below
+	 * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
+	 */
+	SKB_DROP_REASON_TCP_MINTTL,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
-- 
cgit v1.2.3


From 8f84780b84d645d6e35467f4a6f3236b20d7f4b2 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 1 Feb 2023 17:30:56 +0100
Subject: netfilter: flowtable: allow unidirectional rules

Modify flow table offload to support unidirectional connections by
extending enum nf_flow_flags with new "NF_FLOW_HW_BIDIRECTIONAL" flag. Only
offload reply direction when the flag is set. This infrastructure change is
necessary to support offloading UDP NEW connections in original direction
in following patches in series.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index cd982f4a0f50..88ab98ab41d9 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -164,6 +164,7 @@ enum nf_flow_flags {
 	NF_FLOW_HW_DYING,
 	NF_FLOW_HW_DEAD,
 	NF_FLOW_HW_PENDING,
+	NF_FLOW_HW_BIDIRECTIONAL,
 };
 
 enum flow_offload_type {
-- 
cgit v1.2.3


From 1a441a9b8be8849957a01413a144f84932c324cb Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 1 Feb 2023 17:30:57 +0100
Subject: netfilter: flowtable: cache info of last offload

Modify flow table offload to cache the last ct info status that was passed
to the driver offload callbacks by extending enum nf_flow_flags with new
"NF_FLOW_HW_ESTABLISHED" flag. Set the flag if ctinfo was 'established'
during last act_ct meta actions fill call. This infrastructure change is
necessary to optimize promoting of UDP connections from 'new' to
'established' in following patches in this series.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 88ab98ab41d9..ebb28ec5b6fa 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -57,7 +57,7 @@ struct nf_flowtable_type {
 						 struct net_device *dev,
 						 enum flow_block_command cmd);
 	int				(*action)(struct net *net,
-						  const struct flow_offload *flow,
+						  struct flow_offload *flow,
 						  enum flow_offload_tuple_dir dir,
 						  struct nf_flow_rule *flow_rule);
 	void				(*free)(struct nf_flowtable *ft);
@@ -165,6 +165,7 @@ enum nf_flow_flags {
 	NF_FLOW_HW_DEAD,
 	NF_FLOW_HW_PENDING,
 	NF_FLOW_HW_BIDIRECTIONAL,
+	NF_FLOW_HW_ESTABLISHED,
 };
 
 enum flow_offload_type {
@@ -313,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
 				struct net_device *dev,
 				enum flow_block_command cmd);
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
 			    enum flow_offload_tuple_dir dir,
 			    struct nf_flow_rule *flow_rule);
-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
 			    enum flow_offload_tuple_dir dir,
 			    struct nf_flow_rule *flow_rule);
 
-- 
cgit v1.2.3


From 6579f5bacc2c4cbc5ef6abb45352416939d1f844 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2023 09:41:00 +0000
Subject: raw: use net_hash_mix() in hash function

Some applications seem to rely on RAW sockets.

If they use private netns, we can avoid piling all RAW
sockets bound to a given protocol into a single bucket.

Also place (struct raw_hashinfo).lock into its own
cache line to limit false sharing.

Alternative would be to have per-netns hashtables,
but this seems too expensive for most netns
where RAW sockets are not used.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/raw.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/raw.h b/include/net/raw.h
index 5e665934ebc7..2c004c20ed99 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -15,6 +15,8 @@
 
 #include <net/inet_sock.h>
 #include <net/protocol.h>
+#include <net/netns/hash.h>
+#include <linux/hash.h>
 #include <linux/icmp.h>
 
 extern struct proto raw_prot;
@@ -29,13 +31,20 @@ int raw_local_deliver(struct sk_buff *, int);
 
 int raw_rcv(struct sock *, struct sk_buff *);
 
-#define RAW_HTABLE_SIZE	MAX_INET_PROTOS
+#define RAW_HTABLE_LOG	8
+#define RAW_HTABLE_SIZE	(1U << RAW_HTABLE_LOG)
 
 struct raw_hashinfo {
 	spinlock_t lock;
-	struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
+
+	struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
 };
 
+static inline u32 raw_hashfunc(const struct net *net, u32 proto)
+{
+	return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG);
+}
+
 static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
 {
 	int i;
-- 
cgit v1.2.3


From 542bcea4be866b14b3a5c8e90773329066656c43 Mon Sep 17 00:00:00 2001
From: Qingfang DENG <qingfang.deng@siflower.com.cn>
Date: Fri, 3 Feb 2023 09:16:11 +0800
Subject: net: page_pool: use in_softirq() instead

We use BH context only for synchronization, so we don't care if it's
actually serving softirq or not.

As a side node, in case of threaded NAPI, in_serving_softirq() will
return false because it's in process context with BH off, making
page_pool_recycle_in_cache() unreachable.

Signed-off-by: Qingfang DENG <qingfang.deng@siflower.com.cn>
Tested-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/page_pool.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 813c93499f20..34bf531ffc8d 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -386,7 +386,7 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
 static inline void page_pool_ring_lock(struct page_pool *pool)
 	__acquires(&pool->ring.producer_lock)
 {
-	if (in_serving_softirq())
+	if (in_softirq())
 		spin_lock(&pool->ring.producer_lock);
 	else
 		spin_lock_bh(&pool->ring.producer_lock);
@@ -395,7 +395,7 @@ static inline void page_pool_ring_lock(struct page_pool *pool)
 static inline void page_pool_ring_unlock(struct page_pool *pool)
 	__releases(&pool->ring.producer_lock)
 {
-	if (in_serving_softirq())
+	if (in_softirq())
 		spin_unlock(&pool->ring.producer_lock);
 	else
 		spin_unlock_bh(&pool->ring.producer_lock);
-- 
cgit v1.2.3


From 9adafe2b85460be5b9bed9b6f6597526e7d4f7c5 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 4 Feb 2023 15:52:57 +0200
Subject: net/sched: move struct tc_mqprio_qopt_offload from pkt_cls.h to
 pkt_sched.h

Since mqprio is a scheduler and not a classifier, move its offload
structure to pkt_sched.h, where struct tc_taprio_qopt_offload also lies.

Also update some header inclusions in drivers that access this
structure, to the best of my abilities.

Cc: Igor Russkikh <irusskikh@marvell.com>
Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
Cc: Salil Mehta <salil.mehta@huawei.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: Daniel Machon <daniel.machon@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h   | 10 ----------
 include/net/pkt_sched.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4cabb32a2ad9..cd410a87517b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -788,16 +788,6 @@ struct tc_cls_bpf_offload {
 	bool exts_integrated;
 };
 
-struct tc_mqprio_qopt_offload {
-	/* struct tc_mqprio_qopt must always be the first element */
-	struct tc_mqprio_qopt qopt;
-	u16 mode;
-	u16 shaper;
-	u32 flags;
-	u64 min_rate[TC_QOPT_MAX_QUEUE];
-	u64 max_rate[TC_QOPT_MAX_QUEUE];
-};
-
 /* This structure holds cookie structure that is passed from user
  * to the kernel for actions and classifiers
  */
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 38207873eda6..6c5e64e0a0bb 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -160,6 +160,16 @@ struct tc_etf_qopt_offload {
 	s32 queue;
 };
 
+struct tc_mqprio_qopt_offload {
+	/* struct tc_mqprio_qopt must always be the first element */
+	struct tc_mqprio_qopt qopt;
+	u16 mode;
+	u16 shaper;
+	u32 flags;
+	u64 min_rate[TC_QOPT_MAX_QUEUE];
+	u64 max_rate[TC_QOPT_MAX_QUEUE];
+};
+
 struct tc_taprio_caps {
 	bool supports_queue_max_sdu:1;
 };
-- 
cgit v1.2.3


From 19278d76915d6b28269e1af1d7b6754c16576572 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 4 Feb 2023 15:52:59 +0200
Subject: net/sched: mqprio: allow offloading drivers to request queue count
 validation

mqprio_parse_opt() proudly has a comment:

	/* If hardware offload is requested we will leave it to the device
	 * to either populate the queue counts itself or to validate the
	 * provided queue counts.
	 */

Unfortunately some device drivers did not get this memo, and don't
validate the queue counts, or populate them.

In case drivers don't want to populate the queue counts themselves, just
act upon the requested configuration, it makes sense to introduce a tc
capability, and make mqprio query it, so they don't have to do the
validation themselves.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 6c5e64e0a0bb..02e3ccfbc7d1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -160,6 +160,10 @@ struct tc_etf_qopt_offload {
 	s32 queue;
 };
 
+struct tc_mqprio_caps {
+	bool validate_queue_counts:1;
+};
+
 struct tc_mqprio_qopt_offload {
 	/* struct tc_mqprio_qopt must always be the first element */
 	struct tc_mqprio_qopt qopt;
-- 
cgit v1.2.3


From 09c794c0a88d959a603ec49b23df8e6bba68e7b7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 4 Feb 2023 15:53:03 +0200
Subject: net/sched: taprio: pass mqprio queue configuration to ndo_setup_tc()

The taprio qdisc does not currently pass the mqprio queue configuration
down to the offloading device driver. So the driver cannot act upon the
TXQ counts/offsets per TC, or upon the prio->tc map. It was probably
assumed that the driver only wants to offload num_tc (see
TC_MQPRIO_HW_OFFLOAD_TCS), which it can get from netdev_get_num_tc(),
but there's clearly more to the mqprio configuration than that.

I've considered 2 mechanisms to remedy that. First is to pass a struct
tc_mqprio_qopt_offload as part of the tc_taprio_qopt_offload. The second
is to make taprio actually call TC_SETUP_QDISC_MQPRIO, *in addition to*
TC_SETUP_QDISC_TAPRIO.

The difference is that in the first case, existing drivers (offloading
or not) all ignore taprio's mqprio portion currently, whereas in the
second case, we could control whether to call TC_SETUP_QDISC_MQPRIO,
based on a new capability. The question is which approach would be
better.

I'm afraid that calling TC_SETUP_QDISC_MQPRIO unconditionally (not based
on a taprio capability bit) would risk introducing regressions. For
example, taprio doesn't populate (or validate) qopt->hw, as well as
mqprio.flags, mqprio.shaper, mqprio.min_rate, mqprio.max_rate.

In comparison, adding a capability is functionally equivalent to just
passing the mqprio in a way that drivers can ignore it, except it's
slightly more complicated to use it (need to set the capability).

Ultimately, what made me go for the "mqprio in taprio" variant was that
it's easier for offloading drivers to interpret the mqprio qopt slightly
differently when it comes from taprio vs when it comes from mqprio,
should that ever become necessary.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 02e3ccfbc7d1..ace8be520fb0 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -187,6 +187,7 @@ struct tc_taprio_sched_entry {
 };
 
 struct tc_taprio_qopt_offload {
+	struct tc_mqprio_qopt_offload mqprio;
 	u8 enable;
 	ktime_t base_time;
 	u64 cycle_time;
-- 
cgit v1.2.3


From 522d15ea831f88717084304f105b1d195104880e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 4 Feb 2023 15:53:04 +0200
Subject: net/sched: taprio: only pass gate mask per TXQ for igc, stmmac,
 tsnep, am65_cpsw

There are 2 classes of in-tree drivers currently:

- those who act upon struct tc_taprio_sched_entry :: gate_mask as if it
  holds a bit mask of TXQs

- those who act upon the gate_mask as if it holds a bit mask of TCs

When it comes to the standard, IEEE 802.1Q-2018 does say this in the
second paragraph of section 8.6.8.4 Enhancements for scheduled traffic:

| A gate control list associated with each Port contains an ordered list
| of gate operations. Each gate operation changes the transmission gate
| state for the gate associated with each of the Port's traffic class
| queues and allows associated control operations to be scheduled.

In typically obtuse language, it refers to a "traffic class queue"
rather than a "traffic class" or a "queue". But careful reading of
802.1Q clarifies that "traffic class" and "queue" are in fact
synonymous (see 8.6.6 Queuing frames):

| A queue in this context is not necessarily a single FIFO data structure.
| A queue is a record of all frames of a given traffic class awaiting
| transmission on a given Bridge Port. The structure of this record is not
| specified.

i.o.w. their definition of "queue" isn't the Linux TX queue.

The gate_mask really is input into taprio via its UAPI as a mask of
traffic classes, but taprio_sched_to_offload() converts it into a TXQ
mask.

The breakdown of drivers which handle TC_SETUP_QDISC_TAPRIO is:

- hellcreek, felix, sja1105: these are DSA switches, it's not even very
  clear what TXQs correspond to, other than purely software constructs.
  Only the mqprio configuration with 8 TCs and 1 TXQ per TC makes sense.
  So it's fine to convert these to a gate mask per TC.

- enetc: I have the hardware and can confirm that the gate mask is per
  TC, and affects all TXQs (BD rings) configured for that priority.

- igc: in igc_save_qbv_schedule(), the gate_mask is clearly interpreted
  to be per-TXQ.

- tsnep: Gerhard Engleder clarifies that even though this hardware
  supports at most 1 TXQ per TC, the TXQ indices may be different from
  the TC values themselves, and it is the TXQ indices that matter to
  this hardware. So keep it per-TXQ as well.

- stmmac: I have a GMAC datasheet, and in the EST section it does
  specify that the gate events are per TXQ rather than per TC.

- lan966x: again, this is a switch, and while not a DSA one, the way in
  which it implements lan966x_mqprio_add() - by only allowing num_tc ==
  NUM_PRIO_QUEUES (8) - makes it clear to me that TXQs are a purely
  software construct here as well. They seem to map 1:1 with TCs.

- am65_cpsw: from looking at am65_cpsw_est_set_sched_cmds(), I get the
  impression that the fetch_allow variable is treated like a prio_mask.
  This definitely sounds closer to a per-TC gate mask rather than a
  per-TXQ one, and TI documentation does seem to recomment an identity
  mapping between TCs and TXQs. However, Roger Quadros would like to do
  some testing before making changes, so I'm leaving this driver to
  operate as it did before, for now. Link with more details at the end.

Based on this breakdown, we have 5 drivers with a gate mask per TC and
4 with a gate mask per TXQ. So let's make the gate mask per TXQ the
opt-in and the gate mask per TC the default.

Benefit from the TC_QUERY_CAPS feature that Jakub suggested we add, and
query the device driver before calling the proper ndo_setup_tc(), and
figure out if it expects one or the other format.

Link: https://patchwork.kernel.org/project/netdevbpf/patch/20230202003621.2679603-15-vladimir.oltean@nxp.com/#25193204
Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: Siddharth Vadapalli <s-vadapalli@ti.com>
Cc: Roger Quadros <rogerq@kernel.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index ace8be520fb0..fd889fc4912b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -176,6 +176,7 @@ struct tc_mqprio_qopt_offload {
 
 struct tc_taprio_caps {
 	bool supports_queue_max_sdu:1;
+	bool gate_mask_per_txq:1;
 };
 
 struct tc_taprio_sched_entry {
-- 
cgit v1.2.3


From 584f3742890e966d2f0a1f3c418c9ead70b2d99e Mon Sep 17 00:00:00 2001
From: Pietro Borrello <borrello@diag.uniroma1.it>
Date: Sat, 4 Feb 2023 17:39:20 +0000
Subject: net: add sock_init_data_uid()

Add sock_init_data_uid() to explicitly initialize the socket uid.
To initialise the socket uid, sock_init_data() assumes a the struct
socket* sock is always embedded in a struct socket_alloc, used to
access the corresponding inode uid. This may not be true.
Examples are sockets created in tun_chr_open() and tap_open().

Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.")
Signed-off-by: Pietro Borrello <borrello@diag.uniroma1.it>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index dcd72e6285b2..937e842dc930 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1956,7 +1956,12 @@ void sk_common_release(struct sock *sk);
  *	Default socket callbacks and setup code
  */
 
-/* Initialise core socket variables */
+/* Initialise core socket variables using an explicit uid. */
+void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid);
+
+/* Initialise core socket variables.
+ * Assumes struct socket *sock is embedded in a struct socket_alloc.
+ */
 void sock_init_data(struct socket *sock, struct sock *sk);
 
 /*
-- 
cgit v1.2.3


From 2f530df76c8cb5551d7d9395c77eb02282c3dc68 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 7 Feb 2023 15:54:30 +0200
Subject: net/sched: taprio: give higher priority to higher TCs in software
 dequeue mode

Current taprio software implementation is haunted by the shadow of the
igb/igc hardware model. It iterates over child qdiscs in increasing
order of TXQ index, therefore giving higher xmit priority to TXQ 0 and
lower to TXQ N. According to discussions with Vinicius, that is the
default (perhaps even unchangeable) prioritization scheme used for the
NICs that taprio was first written for (igb, igc), and we have a case of
two bugs canceling out, resulting in a functional setup on igb/igc, but
a less sane one on other NICs.

To the best of my understanding, taprio should prioritize based on the
traffic class, so it should really dequeue starting with the highest
traffic class and going down from there. We get to the TXQ using the
tc_to_txq[] netdev property.

TXQs within the same TC have the same (strict) priority, so we should
pick from them as fairly as we can. We can achieve that by implementing
something very similar to q->curband from multiq_dequeue().

Since igb/igc really do have TXQ 0 of higher hardware priority than
TXQ 1 etc, we need to preserve the behavior for them as well. We really
have no choice, because in txtime-assist mode, taprio is essentially a
software scheduler towards offloaded child tc-etf qdiscs, so the TXQ
selection really does matter (not all igb TXQs support ETF/SO_TXTIME,
says Kurt Kanzenbach).

To preserve the behavior, we need a capability bit so that taprio can
determine if it's running on igb/igc, or on something else. Because igb
doesn't offload taprio at all, we can't piggyback on the
qdisc_offload_query_caps() call from taprio_enable_offload(), but
instead we need a separate call which is also made for software
scheduling.

Introduce two static keys to minimize the performance penalty on systems
which only have igb/igc NICs, and on systems which only have other NICs.
For mixed systems, taprio will have to dynamically check whether to
dequeue using one prioritization algorithm or using the other.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fd889fc4912b..2016839991a4 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -177,6 +177,11 @@ struct tc_mqprio_qopt_offload {
 struct tc_taprio_caps {
 	bool supports_queue_max_sdu:1;
 	bool gate_mask_per_txq:1;
+	/* Device expects lower TXQ numbers to have higher priority over higher
+	 * TXQs, regardless of their TC mapping. DO NOT USE FOR NEW DRIVERS,
+	 * INSTEAD ENFORCE A PROPER TC:TXQ MAPPING COMING FROM USER SPACE.
+	 */
+	bool broken_mqprio:1;
 };
 
 struct tc_taprio_sched_entry {
-- 
cgit v1.2.3


From 969cf3e670b5532dc345b8fafaf96a94278a7e09 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 9 Jan 2023 19:37:52 -0600
Subject: Bluetooth: HCI: Replace zero-length arrays with flexible-array
 members

Zero-length arrays are deprecated[1] and we are moving towards
adopting C99 flexible-array members instead. So, replace zero-length
arrays in a couple of structures with flex-array members.

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE
routines on memcpy() and help us make progress towards globally
enabling -fstrict-flex-arrays=3 [2].

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays [1]
Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [2]
Link: https://github.com/KSPP/linux/issues/78
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8d773b042c85..400f8a7d0c3f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2156,7 +2156,7 @@ struct hci_cp_le_big_create_sync {
 	__u8    mse;
 	__le16  timeout;
 	__u8    num_bis;
-	__u8    bis[0];
+	__u8    bis[];
 } __packed;
 
 #define HCI_OP_LE_BIG_TERM_SYNC			0x206c
@@ -2174,7 +2174,7 @@ struct hci_cp_le_setup_iso_path {
 	__le16  codec_vid;
 	__u8    delay[3];
 	__u8    codec_cfg_len;
-	__u8    codec_cfg[0];
+	__u8    codec_cfg[];
 } __packed;
 
 struct hci_rp_le_setup_iso_path {
-- 
cgit v1.2.3


From 2394186a2cefb9a45a029281a55749804dd8c556 Mon Sep 17 00:00:00 2001
From: Pauli Virtanen <pav@iki.fi>
Date: Mon, 30 Jan 2023 20:37:01 +0200
Subject: Bluetooth: MGMT: add CIS feature bits to controller information

Userspace needs to know whether the adapter has feature support for
Connected Isochronous Stream - Central/Peripheral, so it can set up
LE Audio features accordingly.

Expose these feature bits as settings in MGMT controller info.

Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/mgmt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 743f6f59dff8..e18a927669c0 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -109,6 +109,8 @@ struct mgmt_rp_read_index_list {
 #define MGMT_SETTING_STATIC_ADDRESS	0x00008000
 #define MGMT_SETTING_PHY_CONFIGURATION	0x00010000
 #define MGMT_SETTING_WIDEBAND_SPEECH	0x00020000
+#define MGMT_SETTING_CIS_CENTRAL	0x00040000
+#define MGMT_SETTING_CIS_PERIPHERAL	0x00080000
 
 #define MGMT_OP_READ_INFO		0x0004
 #define MGMT_READ_INFO_SIZE		0
-- 
cgit v1.2.3


From 67fc5d7ffbd4f9cf52adf166f5bc9a35fef37f24 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 7 Feb 2023 17:52:07 -0500
Subject: net: extract nf_ct_skb_network_trim function to nf_conntrack_ovs

There are almost the same code in ovs_skb_network_trim() and
tcf_ct_skb_network_trim(), this patch extracts them into a function
nf_ct_skb_network_trim() and moves the function to nf_conntrack_ovs.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netfilter/nf_conntrack.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6a2019aaa464..a6e89d7212f8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -362,6 +362,8 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
 	return net_generic(net, nf_conntrack_net_id);
 }
 
+int nf_ct_skb_network_trim(struct sk_buff *skb, int family);
+
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
-- 
cgit v1.2.3


From 0785407e78d4bce56e04d92a6c961900b3d513dd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 7 Feb 2023 17:52:10 -0500
Subject: net: extract nf_ct_handle_fragments to nf_conntrack_ovs

Now handle_fragments() in OVS and TC have the similar code, and
this patch removes the duplicate code by moving the function
to nf_conntrack_ovs.

Note that skb_clear_hash(skb) or skb->ignore_df = 1 should be
done only when defrag returns 0, as it does in other places
in kernel.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netfilter/nf_conntrack.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a6e89d7212f8..7bbab8f2b73d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -363,6 +363,8 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
 }
 
 int nf_ct_skb_network_trim(struct sk_buff *skb, int family);
+int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+			   u16 zone, u8 family, u8 *proto, u16 *mru);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
-- 
cgit v1.2.3


From ca43ccf41224b023fc290073d5603a755fd12eed Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 9 Feb 2023 16:22:01 -0800
Subject: dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.

Eric Dumazet pointed out [0] that when we call skb_set_owner_r()
for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called,
resulting in a negative sk_forward_alloc.

We add a new helper which clones a skb and sets its owner only
when sk_rmem_schedule() succeeds.

Note that we move skb_set_owner_r() forward in (dccp|tcp)_v6_do_rcv()
because tcp_send_synack() can make sk_forward_alloc negative before
ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases.

[0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/

Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()")
Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index dcd72e6285b2..556209727633 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2434,6 +2434,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
 	return false;
 }
 
+static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk)
+{
+	skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+	if (skb) {
+		if (sk_rmem_schedule(sk, skb, skb->truesize)) {
+			skb_set_owner_r(skb, sk);
+			return skb;
+		}
+		__kfree_skb(skb);
+	}
+	return NULL;
+}
+
 static inline void skb_prepare_for_gro(struct sk_buff *skb)
 {
 	if (skb->destructor != sock_wfree) {
-- 
cgit v1.2.3


From afd888c3e19ceb5247158fe2fabbf7234937a515 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Fri, 10 Feb 2023 11:01:26 +0100
Subject: devlink: make sure driver does not read updated driverinit param
 before reload

The driverinit param purpose is to serve the driver during init/reload
time to provide a value, either default or set by user.

Make sure that driver does not read value updated by user before the
reload is performed. Hold the new value in a separate struct and switch
it during reload.

Note that this is required to be eventually possible to call
devl_param_driverinit_value_get() without holding instance lock.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2e85a5970a32..8ed960345f37 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -489,6 +489,10 @@ struct devlink_param_item {
 	const struct devlink_param *param;
 	union devlink_param_value driverinit_value;
 	bool driverinit_value_valid;
+	union devlink_param_value driverinit_value_new; /* Not reachable
+							 * until reload.
+							 */
+	bool driverinit_value_new_valid;
 };
 
 enum devlink_param_generic_id {
-- 
cgit v1.2.3


From 94ba1c316b9c0f9b017f7cd7eac84adae693e80f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Fri, 10 Feb 2023 11:01:27 +0100
Subject: devlink: fix the name of value arg of
 devl_param_driverinit_value_get()

Probably due to copy-paste error, the name of the arg is "init_val"
which is misleading, as the pointer is used to point to struct where to
store the current value. Rename it to "val" and change the arg comment
a bit on the way.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ed960345f37..6a942e70e451 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1784,7 +1784,7 @@ void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
 int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
-				    union devlink_param_value *init_val);
+				    union devlink_param_value *val);
 void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				     union devlink_param_value init_val);
 void devl_param_value_changed(struct devlink *devlink, u32 param_id);
-- 
cgit v1.2.3


From dc68eaf2c29f410fb078fd6da8e56201d3282e0b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Feb 2023 18:47:05 +0000
Subject: net: dropreason: add SKB_DROP_REASON_IPV6_BAD_EXTHDR

This drop reason can be used whenever an IPv6 packet
has a malformed extension header.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dropreason.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 94bc3d5d8803..6c41e535175c 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -72,6 +72,7 @@
 	FN(FRAG_REASM_TIMEOUT)		\
 	FN(FRAG_TOO_FAR)		\
 	FN(TCP_MINTTL)			\
+	FN(IPV6_BAD_EXTHDR)		\
 	FNe(MAX)
 
 /**
@@ -318,6 +319,8 @@ enum skb_drop_reason {
 	 * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT).
 	 */
 	SKB_DROP_REASON_TCP_MINTTL,
+	/** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
+	SKB_DROP_REASON_IPV6_BAD_EXTHDR,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
-- 
cgit v1.2.3


From 30c89bad3ea2ef7a2d4686f9c3cc08420fe627bc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Feb 2023 18:47:07 +0000
Subject: ipv6: icmp6: add drop reason support to icmpv6_notify()

Accurately reports what happened in icmpv6_notify() when handling
a packet.

This makes use of the new IPV6_BAD_EXTHDR drop reason.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 03f3af02a9a6..7332296eca44 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -436,7 +436,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 		atomic_dec(&fl->users);
 }
 
-void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
+enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
+				   u8 code, __be32 info);
 
 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 				struct icmp6hdr *thdr, int len);
-- 
cgit v1.2.3


From 545dbcd124b02c9dc93c8a5894c71d682effc3e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Feb 2023 18:47:08 +0000
Subject: ipv6: icmp6: add drop reason support to ndisc_rcv()

Creates three new drop reasons:

SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc).

SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit.

SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dropreason.h | 9 +++++++++
 include/net/ndisc.h      | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 6c41e535175c..ef3f65d135d3 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -73,6 +73,9 @@
 	FN(FRAG_TOO_FAR)		\
 	FN(TCP_MINTTL)			\
 	FN(IPV6_BAD_EXTHDR)		\
+	FN(IPV6_NDISC_FRAG)		\
+	FN(IPV6_NDISC_HOP_LIMIT)	\
+	FN(IPV6_NDISC_BAD_CODE)		\
 	FNe(MAX)
 
 /**
@@ -321,6 +324,12 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_TCP_MINTTL,
 	/** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */
 	SKB_DROP_REASON_IPV6_BAD_EXTHDR,
+	/** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */
+	SKB_DROP_REASON_IPV6_NDISC_FRAG,
+	/** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */
+	SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
+	/** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
+	SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index da7eec8669ec..07e5168cdaf9 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -445,7 +445,7 @@ int ndisc_late_init(void);
 void ndisc_late_cleanup(void);
 void ndisc_cleanup(void);
 
-int ndisc_rcv(struct sk_buff *skb);
+enum skb_drop_reason ndisc_rcv(struct sk_buff *skb);
 
 struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
 				const struct in6_addr *saddr, u64 nonce);
-- 
cgit v1.2.3


From ac7d27907d5445d0accaf998e1dc3ea570ed1ba6 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Sun, 12 Feb 2023 15:25:14 +0200
Subject: net/sched: pass flow_stats instead of multiple stats args

Instead of passing 6 stats related args, pass the flow_stats.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/pkt_cls.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cd410a87517b..bf50829d9255 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -294,8 +294,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
 
 static inline void
 tcf_exts_hw_stats_update(const struct tcf_exts *exts,
-			 u64 bytes, u64 packets, u64 drops, u64 lastuse,
-			 u8 used_hw_stats, bool used_hw_stats_valid)
+			 struct flow_stats *stats)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	int i;
@@ -306,12 +305,12 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
 		/* if stats from hw, just skip */
 		if (tcf_action_update_hw_stats(a)) {
 			preempt_disable();
-			tcf_action_stats_update(a, bytes, packets, drops,
-						lastuse, true);
+			tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops,
+						stats->lastused, true);
 			preempt_enable();
 
-			a->used_hw_stats = used_hw_stats;
-			a->used_hw_stats_valid = used_hw_stats_valid;
+			a->used_hw_stats = stats->used_hw_stats;
+			a->used_hw_stats_valid = stats->used_hw_stats_valid;
 		}
 	}
 #endif
-- 
cgit v1.2.3


From d307b2c6f962ad5d83d7a7df71c2e9c9e4106d82 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Sun, 12 Feb 2023 15:25:15 +0200
Subject: net/sched: introduce flow_offload action cookie

Currently a hardware action is uniquely identified by the <id, hw_index>
tuple. However, the id is set by the flow_act_setup callback and tc core
cannot enforce this, and it is possible that a future change could break
this. In addition, <id, hw_index> are not unique across network namespaces.

Uniquely identify the action by setting an action cookie by the tc core.
Use the unique action cookie to query the action's hardware stats.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/flow_offload.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 0400a0ac8a29..d177bf5f0e1a 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -228,6 +228,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
 struct flow_action_entry {
 	enum flow_action_id		id;
 	u32				hw_index;
+	unsigned long			act_cookie;
 	enum flow_action_hw_stats	hw_stats;
 	action_destr			destructor;
 	void				*destructor_priv;
@@ -610,6 +611,7 @@ struct flow_offload_action {
 	enum offload_act_command  command;
 	enum flow_action_id id;
 	u32 index;
+	unsigned long cookie;
 	struct flow_stats stats;
 	struct flow_action action;
 };
-- 
cgit v1.2.3


From 5246c896b805b043a87fa78af32a33cbce00de05 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Sun, 12 Feb 2023 15:25:16 +0200
Subject: net/sched: support per action hw stats

There are currently two mechanisms for populating hardware stats:
1. Using flow_offload api to query the flow's statistics.
   The api assumes that the same stats values apply to all
   the flow's actions.
   This assumption breaks when action drops or jumps over following
   actions.
2. Using hw_action api to query specific action stats via a driver
   callback method. This api assures the correct action stats for
   the offloaded action, however, it does not apply to the rest of the
   actions in the flow's actions array.

Extend the flow_offload stats callback to indicate that a per action
stats update is required.
Use the existing flow_offload_action api to query the action's hw stats.
In addition, currently the tc action stats utility only updates hw actions.
Reuse the existing action stats cb infrastructure to query any action
stats.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/flow_offload.h |  1 +
 include/net/pkt_cls.h      | 29 +++++++++++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index d177bf5f0e1a..8c05455b1e34 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -594,6 +594,7 @@ struct flow_cls_common_offload {
 struct flow_cls_offload {
 	struct flow_cls_common_offload common;
 	enum flow_cls_command command;
+	bool use_act_stats;
 	unsigned long cookie;
 	struct flow_rule *rule;
 	struct flow_stats stats;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bf50829d9255..ace437c6754b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -292,9 +292,15 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts)
 #define tcf_act_for_each_action(i, a, actions) \
 	for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++)
 
+static inline bool tc_act_in_hw(struct tc_action *act)
+{
+	return !!act->in_hw_count;
+}
+
 static inline void
 tcf_exts_hw_stats_update(const struct tcf_exts *exts,
-			 struct flow_stats *stats)
+			 struct flow_stats *stats,
+			 bool use_act_stats)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	int i;
@@ -302,16 +308,18 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
 	for (i = 0; i < exts->nr_actions; i++) {
 		struct tc_action *a = exts->actions[i];
 
-		/* if stats from hw, just skip */
-		if (tcf_action_update_hw_stats(a)) {
-			preempt_disable();
-			tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops,
-						stats->lastused, true);
-			preempt_enable();
-
-			a->used_hw_stats = stats->used_hw_stats;
-			a->used_hw_stats_valid = stats->used_hw_stats_valid;
+		if (use_act_stats || tc_act_in_hw(a)) {
+			if (!tcf_action_update_hw_stats(a))
+				continue;
 		}
+
+		preempt_disable();
+		tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops,
+					stats->lastused, true);
+		preempt_enable();
+
+		a->used_hw_stats = stats->used_hw_stats;
+		a->used_hw_stats_valid = stats->used_hw_stats_valid;
 	}
 #endif
 }
@@ -769,6 +777,7 @@ struct tc_cls_matchall_offload {
 	enum tc_matchall_command command;
 	struct flow_rule *rule;
 	struct flow_stats stats;
+	bool use_act_stats;
 	unsigned long cookie;
 };
 
-- 
cgit v1.2.3


From 9a47c1ef5a95d1fd229ee5e375985f809a9d8177 Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Mon, 16 Jan 2023 18:20:58 +0530
Subject: wifi: cfg80211: Authentication offload to user space for MLO
 connection in STA mode

Currently authentication request event interface doesn't have support to
indicate the user space whether it should enable MLO or not during the
authentication with the specified AP. But driver needs such capability
since the connection is MLO or not decided by the driver in case of SME
offload to the driver.

Add support for driver to indicate MLD address of the AP in
authentication offload request to inform user space to enable MLO during
authentication process. Driver shall look at NL80211_ATTR_MLO_SUPPORT
flag capability in NL80211_CMD_CONNECT to know whether the user space
supports enabling MLO during the authentication offload.

User space should enable MLO during the authentication only when it
receives the AP MLD address in authentication offload request. User
space shouldn't enable MLO if the authentication offload request doesn't
indicate the AP MLD address even if the AP is MLO capable.

When MLO is enabled, user space should use the MAC address of the
interface (on which driver sent request) as self MLD address. User space
and driver to use MLD addresses in RA, TA and BSSID fields of the frames
between them, and driver translates the MLD addresses to/from link
addresses based on the link chosen for the authentication.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20230116125058.1604843-1-quic_vjakkam@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 54a77d906b2d..bcde215475c6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3600,6 +3600,17 @@ struct cfg80211_pmk_conf {
  *	the real status code for failures. Used only for the authentication
  *	response command interface (user space to driver).
  * @pmkid: The identifier to refer a PMKSA.
+ * @mld_addr: MLD address of the peer. Used by the authentication request event
+ *	interface. Driver indicates this to enable MLO during the authentication
+ *	offload to user space. Driver shall look at %NL80211_ATTR_MLO_SUPPORT
+ *	flag capability in NL80211_CMD_CONNECT to know whether the user space
+ *	supports enabling MLO during the authentication offload.
+ *	User space should use the address of the interface (on which the
+ *	authentication request event reported) as self MLD address. User space
+ *	and driver should use MLD addresses in RA, TA and BSSID fields of
+ *	authentication frames sent or received via cfg80211. The driver
+ *	translates the MLD addresses to/from link addresses based on the link
+ *	chosen for the authentication.
  */
 struct cfg80211_external_auth_params {
 	enum nl80211_external_auth_action action;
@@ -3608,6 +3619,7 @@ struct cfg80211_external_auth_params {
 	unsigned int key_mgmt_suite;
 	u16 status;
 	const u8 *pmkid;
+	u8 mld_addr[ETH_ALEN] __aligned(2);
 };
 
 /**
-- 
cgit v1.2.3


From a42e59eb9689e54279227e2af5ed75128d92a82b Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Thu, 26 Jan 2023 20:02:55 +0530
Subject: wifi: cfg80211: Extend cfg80211_new_sta() for MLD AP

Add support for drivers to indicate STA connection(MLO/non-MLO) when
user space SME (e.g., hostapd) is not used for MLD AP.

Add new parameters in struct station_info to provide below information
in cfg80211_new_sta() call:
- MLO link ID of the AP, with which station completed (re)association.
  This is applicable for both MLO and non-MLO station connections when
  the AP affiliated with an MLD.
- Station's MLD address if the connection is MLO capable.
- (Re)Association Response IEs sent to the station. User space needs
  this to determine rejected and accepted affiliated links information
  of the connected station if the connection is MLO capable.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20230126143256.960563-2-quic_vjakkam@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bcde215475c6..28529194eb89 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1876,6 +1876,24 @@ struct cfg80211_tid_stats {
  *	received packet with an FCS error matches the peer MAC address.
  * @airtime_link_metric: mesh airtime link metric.
  * @connected_to_as: true if mesh STA has a path to authentication server
+ * @mlo_params_valid: Indicates @assoc_link_id and @mld_addr fields are filled
+ *	by driver. Drivers use this only in cfg80211_new_sta() calls when AP
+ *	MLD's MLME/SME is offload to driver. Drivers won't fill this
+ *	information in cfg80211_del_sta_sinfo(), get_station() and
+ *	dump_station() callbacks.
+ * @assoc_link_id: Indicates MLO link ID of the AP, with which the station
+ *	completed (re)association. This information filled for both MLO
+ *	and non-MLO STA connections when the AP affiliated with an MLD.
+ * @mld_addr: For MLO STA connection, filled with MLD address of the station.
+ *	For non-MLO STA connection, filled with all zeros.
+ * @assoc_resp_ies: IEs from (Re)Association Response.
+ *	This is used only when in AP mode with drivers that do not use user
+ *	space MLME/SME implementation. The information is provided only for the
+ *	cfg80211_new_sta() calls to notify user space of the IEs. Drivers won't
+ *	fill this information in cfg80211_del_sta_sinfo(), get_station() and
+ *	dump_station() callbacks. User space needs this information to determine
+ *	the accepted and rejected affiliated links of the connected station.
+ * @assoc_resp_ies_len: Length of @assoc_resp_ies buffer in octets.
  */
 struct station_info {
 	u64 filled;
@@ -1935,6 +1953,12 @@ struct station_info {
 	u32 airtime_link_metric;
 
 	u8 connected_to_as;
+
+	bool mlo_params_valid;
+	u8 assoc_link_id;
+	u8 mld_addr[ETH_ALEN] __aligned(2);
+	const u8 *assoc_resp_ies;
+	size_t assoc_resp_ies_len;
 };
 
 /**
-- 
cgit v1.2.3


From 8bb588d975019748ebdab9448e9a274b7463c13b Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Date: Thu, 26 Jan 2023 20:02:56 +0530
Subject: wifi: cfg80211: Extend cfg80211_update_owe_info_event() for MLD AP

Add support to offload OWE processing to user space for MLD AP when
driver's SME in use.

Add new parameters in struct cfg80211_update_owe_info to provide below
information in cfg80211_update_owe_info_event() call:
- MLO link ID of the AP, with which station requested (re)association.
  This is applicable for both MLO and non-MLO station connections when
  the AP affiliated with an MLD.
- Station's MLD address if the connection is MLO capable.

Signed-off-by: Veerendranath Jakkam <quic_vjakkam@quicinc.com>
Link: https://lore.kernel.org/r/20230126143256.960563-3-quic_vjakkam@quicinc.com
[reformat the trace event macro]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 28529194eb89..70f01ea5ba5c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3898,12 +3898,22 @@ struct cfg80211_pmsr_request {
  *	the IEs of the remote peer in the event from the host driver and
  *	the constructed IEs by the user space in the request interface.
  * @ie_len: Length of IEs in octets.
+ * @assoc_link_id: MLO link ID of the AP, with which (re)association requested
+ *	by peer. This will be filled by driver for both MLO and non-MLO station
+ *	connections when the AP affiliated with an MLD. For non-MLD AP mode, it
+ *	will be -1. Used only with OWE update event (driver to user space).
+ * @peer_mld_addr: For MLO connection, MLD address of the peer. For non-MLO
+ *	connection, it will be all zeros. This is applicable only when
+ *	@assoc_link_id is not -1, i.e., the AP affiliated with an MLD. Used only
+ *	with OWE update event (driver to user space).
  */
 struct cfg80211_update_owe_info {
 	u8 peer[ETH_ALEN] __aligned(2);
 	u16 status;
 	const u8 *ie;
 	size_t ie_len;
+	int assoc_link_id;
+	u8 peer_mld_addr[ETH_ALEN] __aligned(2);
 };
 
 /**
-- 
cgit v1.2.3


From aa87cd8b35736a5183745ab0ec4b82419024dfd7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Jan 2023 12:39:31 +0100
Subject: wifi: mac80211: mlme: handle EHT channel puncturing

Handle the Puncturing info received from the AP in the
EHT Operation element in beacons.

If the info is invalid:
 - during association: disable EHT connection for the AP
 - after association: disconnect

This commit includes many (internal) bugfixes and spec
updates various people.

Co-developed-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://lore.kernel.org/r/20230127123930.4fbc74582331.I3547481d49f958389f59dfeba3fcc75e72b0aa6e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2635e6de8101..54ffc0cc2918 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -340,7 +340,7 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
  * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
  *	status changed.
- *
+ * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed.
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -375,6 +375,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_HE_BSS_COLOR	= 1<<29,
 	BSS_CHANGED_FILS_DISCOVERY      = 1<<30,
 	BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
+	BSS_CHANGED_EHT_PUNCTURING	= BIT_ULL(32),
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -640,6 +641,7 @@ struct ieee80211_fils_discovery {
  * @tx_pwr_env_num: number of @tx_pwr_env.
  * @pwr_reduction: power constraint of BSS.
  * @eht_support: does this BSS support EHT
+ * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS
  * @csa_active: marks whether a channel switch is going on. Internally it is
  *	write-protected by sdata_lock and local->mtx so holding either is fine
  *	for read access.
@@ -736,6 +738,7 @@ struct ieee80211_bss_conf {
 	u8 tx_pwr_env_num;
 	u8 pwr_reduction;
 	bool eht_support;
+	u16 eht_puncturing;
 
 	bool csa_active;
 	bool mu_mimo_owner;
-- 
cgit v1.2.3


From b25413fed3d43e1ed3340df4d928971bb8639f66 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <quic_alokad@quicinc.com>
Date: Mon, 30 Jan 2023 16:12:24 -0800
Subject: wifi: cfg80211: move puncturing bitmap validation from mac80211

- Move ieee80211_valid_disable_subchannel_bitmap() from mlme.c to
  chan.c, rename it as cfg80211_valid_disable_subchannel_bitmap()
  and export it.
- Modify the prototype to include struct cfg80211_chan_def instead
  of only bandwidth to support a check which returns false if the
  primary channel is punctured.

Signed-off-by: Aloka Dixit <quic_alokad@quicinc.com>
Link: https://lore.kernel.org/r/20230131001227.25014-2-quic_alokad@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 70f01ea5ba5c..191603f32b37 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8952,4 +8952,16 @@ static inline int cfg80211_color_change_notify(struct net_device *dev)
 					 0, 0);
 }
 
+/**
+ * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap
+ * @bitmap: bitmap to be validated
+ * @chandef: channel definition
+ *
+ * Validate the puncturing bitmap.
+ *
+ * Return: %true if the bitmap is valid. %false otherwise.
+ */
+bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
+					      const struct cfg80211_chan_def *chandef);
+
 #endif /* __NET_CFG80211_H */
-- 
cgit v1.2.3


From d7c1a9a0ed180d8884798ce97afe7283622a484f Mon Sep 17 00:00:00 2001
From: Aloka Dixit <quic_alokad@quicinc.com>
Date: Mon, 30 Jan 2023 16:12:25 -0800
Subject: wifi: nl80211: validate and configure puncturing bitmap

- New feature flag, NL80211_EXT_FEATURE_PUNCT, to advertise
  driver support for preamble puncturing in AP mode.
- New attribute, NL80211_ATTR_PUNCT_BITMAP, to receive a puncturing
  bitmap from the userspace during AP bring up (NL80211_CMD_START_AP)
  and channel switch (NL80211_CMD_CHANNEL_SWITCH) operations. Each bit
  corresponds to a 20 MHz channel in the operating bandwidth, lowest
  bit for the lowest channel. Bit set to 1 indicates that the channel
  is punctured. Higher 16 bits are reserved.
- New members added to structures cfg80211_ap_settings and
  cfg80211_csa_settings to propagate the bitmap to the driver after
  validation.

Signed-off-by: Aloka Dixit <quic_alokad@quicinc.com>
Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Link: https://lore.kernel.org/r/20230131001227.25014-3-quic_alokad@quicinc.com
[move validation against 0xffff into policy]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 191603f32b37..fb8b79e6ac36 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1316,6 +1316,9 @@ struct cfg80211_unsol_bcast_probe_resp {
  * @fils_discovery: FILS discovery transmission parameters
  * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
  * @mbssid_config: AP settings for multiple bssid
+ * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
+ *	a 20 MHz channel, lowest bit corresponding to the lowest channel.
+ *	Bit set to 1 indicates that the channel is punctured.
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1350,6 +1353,7 @@ struct cfg80211_ap_settings {
 	struct cfg80211_fils_discovery fils_discovery;
 	struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
 	struct cfg80211_mbssid_config mbssid_config;
+	u16 punct_bitmap;
 };
 
 /**
@@ -1367,6 +1371,9 @@ struct cfg80211_ap_settings {
  * @radar_required: whether radar detection is required on the new channel
  * @block_tx: whether transmissions should be blocked while changing
  * @count: number of beacons until switch
+ * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
+ *	a 20 MHz channel, lowest bit corresponding to the lowest channel.
+ *	Bit set to 1 indicates that the channel is punctured.
  */
 struct cfg80211_csa_settings {
 	struct cfg80211_chan_def chandef;
@@ -1379,6 +1386,7 @@ struct cfg80211_csa_settings {
 	bool radar_required;
 	bool block_tx;
 	u8 count;
+	u16 punct_bitmap;
 };
 
 /**
-- 
cgit v1.2.3


From b345f0637c0042f9e6b78378a32256d90f485774 Mon Sep 17 00:00:00 2001
From: Aloka Dixit <quic_alokad@quicinc.com>
Date: Mon, 30 Jan 2023 16:12:26 -0800
Subject: wifi: cfg80211: include puncturing bitmap in channel switch events

Add puncturing bitmap in channel switch notifications
and corresponding trace functions.

Signed-off-by: Aloka Dixit <quic_alokad@quicinc.com>
Link: https://lore.kernel.org/r/20230131001227.25014-4-quic_alokad@quicinc.com
[fix qtnfmac]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fb8b79e6ac36..e570530191ec 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8326,13 +8326,14 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
  * @dev: the device which switched channels
  * @chandef: the new channel definition
  * @link_id: the link ID for MLO, must be 0 for non-MLO
+ * @punct_bitmap: the new puncturing bitmap
  *
  * Caller must acquire wdev_lock, therefore must only be called from sleepable
  * driver context!
  */
 void cfg80211_ch_switch_notify(struct net_device *dev,
 			       struct cfg80211_chan_def *chandef,
-			       unsigned int link_id);
+			       unsigned int link_id, u16 punct_bitmap);
 
 /*
  * cfg80211_ch_switch_started_notify - notify channel switch start
@@ -8341,6 +8342,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
  * @link_id: the link ID for MLO, must be 0 for non-MLO
  * @count: the number of TBTTs until the channel switch happens
  * @quiet: whether or not immediate quiet was requested by the AP
+ * @punct_bitmap: the future puncturing bitmap
  *
  * Inform the userspace about the channel switch that has just
  * started, so that it can take appropriate actions (eg. starting
@@ -8349,7 +8351,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
 void cfg80211_ch_switch_started_notify(struct net_device *dev,
 				       struct cfg80211_chan_def *chandef,
 				       unsigned int link_id, u8 count,
-				       bool quiet);
+				       bool quiet, u16 punct_bitmap);
 
 /**
  * ieee80211_operating_class_to_band - convert operating class to band
-- 
cgit v1.2.3


From 2cc25e4b2a04cdd90dbb2916678745565cc4aeed Mon Sep 17 00:00:00 2001
From: Aloka Dixit <quic_alokad@quicinc.com>
Date: Mon, 30 Jan 2023 16:12:27 -0800
Subject: wifi: mac80211: configure puncturing bitmap

- Configure the bitmap in link_conf and notify the driver.
- Modify 'change' in ieee80211_start_ap() from u32 to u64 to support
BSS_CHANGED_EHT_PUNCTURING.
- Propagate the bitmap in channel switch events to userspace.

Signed-off-by: Aloka Dixit <quic_alokad@quicinc.com>
Signed-off-by: Muna Sinada <quic_msinada@quicinc.com>
Link: https://lore.kernel.org/r/20230131001227.25014-5-quic_alokad@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 54ffc0cc2918..219fd15893b0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -645,6 +645,7 @@ struct ieee80211_fils_discovery {
  * @csa_active: marks whether a channel switch is going on. Internally it is
  *	write-protected by sdata_lock and local->mtx so holding either is fine
  *	for read access.
+ * @csa_punct_bitmap: new puncturing bitmap for channel switch
  * @mu_mimo_owner: indicates interface owns MU-MIMO capability
  * @chanctx_conf: The channel context this interface is assigned to, or %NULL
  *	when it is not assigned. This pointer is RCU-protected due to the TX
@@ -741,6 +742,8 @@ struct ieee80211_bss_conf {
 	u16 eht_puncturing;
 
 	bool csa_active;
+	u16 csa_punct_bitmap;
+
 	bool mu_mimo_owner;
 	struct ieee80211_chanctx_conf __rcu *chanctx_conf;
 
-- 
cgit v1.2.3


From 986e43b19ae9176093da35e0a844e65c8bf9ede7 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 13 Feb 2023 11:08:54 +0100
Subject: wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces

The current mac80211 mesh A-MSDU receive path fails to parse A-MSDU packets
on mesh interfaces, because it assumes that the Mesh Control field is always
directly after the 802.11 header.
802.11-2020 9.3.2.2.2 Figure 9-70 shows that the Mesh Control field is
actually part of the A-MSDU subframe header.
This makes more sense, since it allows packets for multiple different
destinations to be included in the same A-MSDU, as long as RA and TID are
still the same.
Another issue is the fact that the A-MSDU subframe length field was apparently
accidentally defined as little-endian in the standard.

In order to fix this, the mesh forwarding path needs happen at a different
point in the receive path.

ieee80211_data_to_8023_exthdr is changed to ignore the mesh control field
and leave it in after the ethernet header. This also affects the source/dest
MAC address fields, which now in the case of mesh point to the mesh SA/DA.

ieee80211_amsdu_to_8023s is changed to deal with the endian difference and
to add the Mesh Control length to the subframe length, since it's not covered
by the MSDU length field.

With these changes, the mac80211 will get the same packet structure for
converted regular data packets and unpacked A-MSDU subframes.

The mesh forwarding checks are now only performed after the A-MSDU decap.
For locally received packets, the Mesh Control header is stripped away.
For forwarded packets, a new 802.11 header gets added.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20230213100855.34315-4-nbd@nbd.name
[fix fortify build error]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e570530191ec..c506dc128685 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6251,11 +6251,36 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
  * @extra_headroom: The hardware extra headroom for SKBs in the @list.
  * @check_da: DA to check in the inner ethernet header, or NULL
  * @check_sa: SA to check in the inner ethernet header, or NULL
+ * @mesh_control: A-MSDU subframe header includes the mesh control field
  */
 void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
 			      const u8 *addr, enum nl80211_iftype iftype,
 			      const unsigned int extra_headroom,
-			      const u8 *check_da, const u8 *check_sa);
+			      const u8 *check_da, const u8 *check_sa,
+			      bool mesh_control);
+
+/**
+ * ieee80211_get_8023_tunnel_proto - get RFC1042 or bridge tunnel encap protocol
+ *
+ * Check for RFC1042 or bridge tunnel header and fetch the encapsulated
+ * protocol.
+ *
+ * @hdr: pointer to the MSDU payload
+ * @proto: destination pointer to store the protocol
+ * Return: true if encapsulation was found
+ */
+bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto);
+
+/**
+ * ieee80211_strip_8023_mesh_hdr - strip mesh header from converted 802.3 frames
+ *
+ * Strip the mesh header, which was left in by ieee80211_data_to_8023 as part
+ * of the MSDU data. Also move any source/destination addresses from the mesh
+ * header to the ethernet header (if present).
+ *
+ * @skb: The 802.3 frame with embedded mesh header
+ */
+int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb);
 
 /**
  * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame
-- 
cgit v1.2.3


From 6e4c0d0460bd32ca9244dff3ba2d2da27235de11 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 13 Feb 2023 11:08:55 +0100
Subject: wifi: mac80211: add a workaround for receiving non-standard mesh
 A-MSDU

At least ath10k and ath11k supported hardware (maybe more) does not implement
mesh A-MSDU aggregation in a standard compliant way.
802.11-2020 9.3.2.2.2 declares that the Mesh Control field is part of the
A-MSDU header (and little-endian).
As such, its length must not be included in the subframe length field.
Hardware affected by this bug treats the mesh control field as part of the
MSDU data and sets the length accordingly.
In order to avoid packet loss, keep track of which stations are affected
by this and take it into account when converting A-MSDU to 802.3 + mesh control
packets.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20230213100855.34315-5-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c506dc128685..9b015bb877db 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6236,6 +6236,19 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
 }
 
+/**
+ * ieee80211_is_valid_amsdu - check if subframe lengths of an A-MSDU are valid
+ *
+ * This is used to detect non-standard A-MSDU frames, e.g. the ones generated
+ * by ath10k and ath11k, where the subframe length includes the length of the
+ * mesh control field.
+ *
+ * @skb: The input A-MSDU frame without any headers.
+ * @mesh_hdr: use standard compliant mesh A-MSDU subframe header
+ * Returns: true if subframe header lengths are valid for the @mesh_hdr mode
+ */
+bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr);
+
 /**
  * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
  *
-- 
cgit v1.2.3


From 935ef47b16cc5bc15fcd2b3dbc61abb0b7ea671a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 1 Feb 2023 13:48:30 +0100
Subject: wifi: cfg80211: get rid of gfp in cfg80211_bss_color_notify

Since cfg80211_bss_color_notify() is now always run in non-atomic
context, get rid of gfp_t flags in the routine signature and always use
GFP_KERNEL for netlink message allocation.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/c687724e7b53556f7a2d9cbe3d11cdcf065cb687.1675255390.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9b015bb877db..c65f17d74191 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8936,12 +8936,11 @@ void cfg80211_bss_flush(struct wiphy *wiphy);
 /**
  * cfg80211_bss_color_notify - notify about bss color event
  * @dev: network device
- * @gfp: allocation flags
  * @cmd: the actual event we want to notify
  * @count: the number of TBTTs until the color change happens
  * @color_bitmap: representations of the colors that the local BSS is aware of
  */
-int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+int cfg80211_bss_color_notify(struct net_device *dev,
 			      enum nl80211_commands cmd, u8 count,
 			      u64 color_bitmap);
 
@@ -8952,10 +8951,9 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
  * @gfp: allocation flags
  */
 static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
-						       u64 color_bitmap, gfp_t gfp)
+						       u64 color_bitmap)
 {
-	return cfg80211_bss_color_notify(dev, gfp,
-					 NL80211_CMD_OBSS_COLOR_COLLISION,
+	return cfg80211_bss_color_notify(dev, NL80211_CMD_OBSS_COLOR_COLLISION,
 					 0, color_bitmap);
 }
 
@@ -8969,8 +8967,7 @@ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
 static inline int cfg80211_color_change_started_notify(struct net_device *dev,
 						       u8 count)
 {
-	return cfg80211_bss_color_notify(dev, GFP_KERNEL,
-					 NL80211_CMD_COLOR_CHANGE_STARTED,
+	return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_STARTED,
 					 count, 0);
 }
 
@@ -8982,8 +8979,7 @@ static inline int cfg80211_color_change_started_notify(struct net_device *dev,
  */
 static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
 {
-	return cfg80211_bss_color_notify(dev, GFP_KERNEL,
-					 NL80211_CMD_COLOR_CHANGE_ABORTED,
+	return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_ABORTED,
 					 0, 0);
 }
 
@@ -8995,7 +8991,7 @@ static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
  */
 static inline int cfg80211_color_change_notify(struct net_device *dev)
 {
-	return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+	return cfg80211_bss_color_notify(dev,
 					 NL80211_CMD_COLOR_CHANGE_COMPLETED,
 					 0, 0);
 }
-- 
cgit v1.2.3


From d99975c4953eb79e389d4630e848435c700e2dfc Mon Sep 17 00:00:00 2001
From: Wen Gong <quic_wgong@quicinc.com>
Date: Wed, 1 Feb 2023 01:53:13 -0500
Subject: wifi: cfg80211: call reg_notifier for self managed wiphy from driver
 hint

Currently the regulatory driver does not call the regulatory callback
reg_notifier for self managed wiphys. Sometimes driver needs cfg80211
to calculate the info of ieee80211_channel such as flags and power,
and driver needs to get the info of ieee80211_channel after hint of
driver, but driver does not know when calculation of the info of
ieee80211_channel become finished, so add notify to driver in
reg_process_self_managed_hint() from cfg80211 is a good way, then
driver could get the correct info in callback of reg_notifier.

Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
Link: https://lore.kernel.org/r/20230201065313.27203-1-quic_wgong@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c65f17d74191..15fb019ce28d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4737,6 +4737,8 @@ struct cfg80211_ops {
  *	complete feature/interface combinations/etc. advertisement. No driver
  *	should set this flag for now.
  * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys.
+ * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for
+ *	NL80211_REGDOM_SET_BY_DRIVER.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK		= BIT(0),
@@ -4762,6 +4764,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL	= BIT(21),
 	WIPHY_FLAG_SUPPORTS_5_10_MHZ		= BIT(22),
 	WIPHY_FLAG_HAS_CHANNEL_SWITCH		= BIT(23),
+	WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER	= BIT(24),
 };
 
 /**
-- 
cgit v1.2.3


From fe33311c3e371855c4f4c0ab8a5fce5b9a9fdafd Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Tue, 14 Feb 2023 12:14:10 +0800
Subject: net: no longer support SOCK_REFCNT_DEBUG feature

Commit e48c414ee61f ("[INET]: Generalise the TCP sock ID lookup routines")
commented out the definition of SOCK_REFCNT_DEBUG in 2005 and later another
commit 463c84b97f24 ("[NET]: Introduce inet_connection_sock") removed it.
Since we could track all of them through bpf and kprobe related tools
and the feature could print loads of information which might not be
that helpful even under a little bit pressure, the whole feature which
has been inactive for many years is no longer supported.

Link: https://lore.kernel.org/lkml/20230211065153.54116-1-kerneljasonxing@gmail.com/
Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 937e842dc930..2cb258fde072 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1349,9 +1349,6 @@ struct proto {
 	char			name[32];
 
 	struct list_head	node;
-#ifdef SOCK_REFCNT_DEBUG
-	atomic_t		socks;
-#endif
 	int			(*diag_destroy)(struct sock *sk, int err);
 } __randomize_layout;
 
@@ -1359,31 +1356,6 @@ int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
 int sock_load_diag_module(int family, int protocol);
 
-#ifdef SOCK_REFCNT_DEBUG
-static inline void sk_refcnt_debug_inc(struct sock *sk)
-{
-	atomic_inc(&sk->sk_prot->socks);
-}
-
-static inline void sk_refcnt_debug_dec(struct sock *sk)
-{
-	atomic_dec(&sk->sk_prot->socks);
-	printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
-	       sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
-}
-
-static inline void sk_refcnt_debug_release(const struct sock *sk)
-{
-	if (refcount_read(&sk->sk_refcnt) != 1)
-		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
-		       sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
-}
-#else /* SOCK_REFCNT_DEBUG */
-#define sk_refcnt_debug_inc(sk) do { } while (0)
-#define sk_refcnt_debug_dec(sk) do { } while (0)
-#define sk_refcnt_debug_release(sk) do { } while (0)
-#endif /* SOCK_REFCNT_DEBUG */
-
 INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
 
 static inline int sk_forward_alloc_get(const struct sock *sk)
-- 
cgit v1.2.3


From 4048a6a7380c8be2f06c1c386dd63d1bd3cdb0a0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 15 Feb 2023 10:32:07 +0100
Subject: wifi: cfg80211: remove gfp parameter from
 cfg80211_obss_color_collision_notify description

Get rid of gfp parameter from cfg80211_obss_color_collision_notify
routine description.

Fixes: 935ef47b16cc ("wifi: cfg80211: get rid of gfp in cfg80211_bss_color_notify")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/2da652e2cd5c7903191091ae9757718f1be802a1.1676453359.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 15fb019ce28d..f115b2550309 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8951,7 +8951,6 @@ int cfg80211_bss_color_notify(struct net_device *dev,
  * cfg80211_obss_color_collision_notify - notify about bss color collision
  * @dev: network device
  * @color_bitmap: representations of the colors that the local BSS is aware of
- * @gfp: allocation flags
  */
 static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
 						       u64 color_bitmap)
-- 
cgit v1.2.3


From 8c710f75256bb3cf05ac7b1672c82b92c43f3d28 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:14 -0500
Subject: net/sched: Retire tcindex classifier

The tcindex classifier has served us well for about a quarter of a century
but has not been getting much TLC due to lack of known users. Most recently
it has become easy prey to syzkaller. For this reason, we are retiring it.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_wrapper.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index d323fffb839a..8ba241760d0a 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -154,7 +154,6 @@ TC_INDIRECT_FILTER_DECLARE(mall_classify);
 TC_INDIRECT_FILTER_DECLARE(route4_classify);
 TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
 TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
-TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
 TC_INDIRECT_FILTER_DECLARE(u32_classify);
 
 static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -207,10 +206,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tp->classify == rsvp6_classify)
 		return rsvp6_classify(skb, tp, res);
 #endif
-#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
-	if (tp->classify == tcindex_classify)
-		return tcindex_classify(skb, tp, res);
-#endif
 
 skip:
 	return tp->classify(skb, tp, res);
-- 
cgit v1.2.3


From 265b4da82dbf5df04bee5a5d46b7474b1aaf326a Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:15 -0500
Subject: net/sched: Retire rsvp classifier

The rsvp classifier has served us well for about a quarter of a century but has
has not been getting much maintenance attention due to lack of known users.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_wrapper.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index 8ba241760d0a..a6d481b5bcbc 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -152,8 +152,6 @@ TC_INDIRECT_FILTER_DECLARE(flow_classify);
 TC_INDIRECT_FILTER_DECLARE(fw_classify);
 TC_INDIRECT_FILTER_DECLARE(mall_classify);
 TC_INDIRECT_FILTER_DECLARE(route4_classify);
-TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
-TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
 TC_INDIRECT_FILTER_DECLARE(u32_classify);
 
 static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -198,14 +196,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tp->classify == route4_classify)
 		return route4_classify(skb, tp, res);
 #endif
-#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
-	if (tp->classify == rsvp_classify)
-		return rsvp_classify(skb, tp, res);
-#endif
-#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
-	if (tp->classify == rsvp6_classify)
-		return rsvp6_classify(skb, tp, res);
-#endif
 
 skip:
 	return tp->classify(skb, tp, res);
-- 
cgit v1.2.3


From 7d12057b45fbc1e1315d327dca13e8d6b5019113 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Feb 2023 18:15:31 -0300
Subject: net/sched: act_nat: transition to percpu stats and rcu

The tc action act_nat was using shared stats and taking the per action
lock in the datapath. Improve it by using percpu stats and rcu.

perf before:
- 10.48% tcf_nat_act
   - 81.83% _raw_spin_lock
        81.08% native_queued_spin_lock_slowpath

perf after:
- 0.48% tcf_nat_act

tdc results:
1..27
ok 1 7565 - Add nat action on ingress with default control action
ok 2 fd79 - Add nat action on ingress with pipe control action
ok 3 eab9 - Add nat action on ingress with continue control action
ok 4 c53a - Add nat action on ingress with reclassify control action
ok 5 76c9 - Add nat action on ingress with jump control action
ok 6 24c6 - Add nat action on ingress with drop control action
ok 7 2120 - Add nat action on ingress with maximum index value
ok 8 3e9d - Add nat action on ingress with invalid index value
ok 9 f6c9 - Add nat action on ingress with invalid IP address
ok 10 be25 - Add nat action on ingress with invalid argument
ok 11 a7bd - Add nat action on ingress with DEFAULT IP address
ok 12 ee1e - Add nat action on ingress with ANY IP address
ok 13 1de8 - Add nat action on ingress with ALL IP address
ok 14 8dba - Add nat action on egress with default control action
ok 15 19a7 - Add nat action on egress with pipe control action
ok 16 f1d9 - Add nat action on egress with continue control action
ok 17 6d4a - Add nat action on egress with reclassify control action
ok 18 b313 - Add nat action on egress with jump control action
ok 19 d9fc - Add nat action on egress with drop control action
ok 20 a895 - Add nat action on egress with DEFAULT IP address
ok 21 2572 - Add nat action on egress with ANY IP address
ok 22 37f3 - Add nat action on egress with ALL IP address
ok 23 6054 - Add nat action on egress with cookie
ok 24 79d6 - Add nat action on ingress with cookie
ok 25 4b12 - Replace nat action with invalid goto chain control
ok 26 b811 - Delete nat action with valid index
ok 27 a521 - Delete nat action with invalid index

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_act/tc_nat.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h
index c14407160812..c869274ac529 100644
--- a/include/net/tc_act/tc_nat.h
+++ b/include/net/tc_act/tc_nat.h
@@ -5,13 +5,17 @@
 #include <linux/types.h>
 #include <net/act_api.h>
 
-struct tcf_nat {
-	struct tc_action common;
-
+struct tcf_nat_parms {
 	__be32 old_addr;
 	__be32 new_addr;
 	__be32 mask;
 	u32 flags;
+	struct rcu_head rcu;
+};
+
+struct tcf_nat {
+	struct tc_action common;
+	struct tcf_nat_parms __rcu *parms;
 };
 
 #define to_tcf_nat(a) ((struct tcf_nat *)a)
-- 
cgit v1.2.3


From 288864effe33885988d53faf7830b35cb9a84c7a Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Feb 2023 18:15:32 -0300
Subject: net/sched: act_connmark: transition to percpu stats and rcu

The tc action act_connmark was using shared stats and taking the per
action lock in the datapath. Improve it by using percpu stats and rcu.

perf before:
- 13.55% tcf_connmark_act
   - 81.18% _raw_spin_lock
       80.46% native_queued_spin_lock_slowpath

perf after:
- 2.85% tcf_connmark_act

tdc results:
1..15
ok 1 2002 - Add valid connmark action with defaults
ok 2 56a5 - Add valid connmark action with control pass
ok 3 7c66 - Add valid connmark action with control drop
ok 4 a913 - Add valid connmark action with control pipe
ok 5 bdd8 - Add valid connmark action with control reclassify
ok 6 b8be - Add valid connmark action with control continue
ok 7 d8a6 - Add valid connmark action with control jump
ok 8 aae8 - Add valid connmark action with zone argument
ok 9 2f0b - Add valid connmark action with invalid zone argument
ok 10 9305 - Add connmark action with unsupported argument
ok 11 71ca - Add valid connmark action and replace it
ok 12 5f8f - Add valid connmark action with cookie
ok 13 c506 - Replace connmark with invalid goto chain control
ok 14 6571 - Delete connmark action with valid index
ok 15 3426 - Delete connmark action with invalid index

Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_act/tc_connmark.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
index 1f4cb477bb5d..e8dd77a96748 100644
--- a/include/net/tc_act/tc_connmark.h
+++ b/include/net/tc_act/tc_connmark.h
@@ -4,10 +4,15 @@
 
 #include <net/act_api.h>
 
-struct tcf_connmark_info {
-	struct tc_action common;
+struct tcf_connmark_parms {
 	struct net *net;
 	u16 zone;
+	struct rcu_head rcu;
+};
+
+struct tcf_connmark_info {
+	struct tc_action common;
+	struct tcf_connmark_parms __rcu *parms;
 };
 
 #define to_connmark(a) ((struct tcf_connmark_info *)a)
-- 
cgit v1.2.3


From 2954fe60e33da0f4de4d81a4c95c7dddb517d00c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 1 Feb 2023 14:45:22 +0100
Subject: netfilter: let reset rules clean out conntrack entries

iptables/nftables support responding to tcp packets with tcp resets.

The generated tcp reset packet passes through both output and postrouting
netfilter hooks, but conntrack will never see them because the generated
skb has its ->nfct pointer copied over from the packet that triggered the
reset rule.

If the reset rule is used for established connections, this
may result in the conntrack entry to be around for a very long
time (default timeout is 5 days).

One way to avoid this would be to not copy the nf_conn pointer
so that the rest packet passes through conntrack too.

Problem is that output rules might not have the same conntrack
zone setup as the prerouting ones, so its possible that the
reset skb won't find the correct entry.  Generating a template
entry for the skb seems error prone as well.

Add an explicit "closing" function that switches a confirmed
conntrack entry to closed state and wire this up for tcp.

If the entry isn't confirmed, no action is needed because
the conntrack entry will never be committed to the table.

Reported-by: Russel King <linux@armlinux.org.uk>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6a2019aaa464..3dbf947285be 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -125,6 +125,12 @@ struct nf_conn {
 	union nf_conntrack_proto proto;
 };
 
+static inline struct nf_conn *
+nf_ct_to_nf_conn(const struct nf_conntrack *nfct)
+{
+	return container_of(nfct, struct nf_conn, ct_general);
+}
+
 static inline struct nf_conn *
 nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
 {
@@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
 
 void nf_ct_destroy(struct nf_conntrack *nfct);
 
+void nf_conntrack_tcp_set_closing(struct nf_conn *ct);
+
 /* decrement reference count on a conntrack */
 static inline void nf_ct_put(struct nf_conn *ct)
 {
-- 
cgit v1.2.3


From 784d4477f07b930df73bc77e842e03f1dacb83aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Feb 2023 16:28:40 +0000
Subject: ipv6: icmp6: add SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS

This is a generic drop reason for any error detected
in ndisc_parse_options().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index ef3f65d135d3..239a5c0ea83e 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -76,6 +76,7 @@
 	FN(IPV6_NDISC_FRAG)		\
 	FN(IPV6_NDISC_HOP_LIMIT)	\
 	FN(IPV6_NDISC_BAD_CODE)		\
+	FN(IPV6_NDISC_BAD_OPTIONS)	\
 	FNe(MAX)
 
 /**
@@ -330,6 +331,8 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT,
 	/** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */
 	SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
+	/** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
+	SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
-- 
cgit v1.2.3


From c34b8bb11ebc135e970653bd6fc8e3f863fb6a81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Feb 2023 16:28:41 +0000
Subject: ipv6: icmp6: add SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST

Hosts can often receive neighbour discovery messages
that are not for them.

Use a dedicated drop reason to make clear the packet is dropped
for this normal case.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dropreason.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 239a5c0ea83e..c0a3ea806cd5 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -77,6 +77,7 @@
 	FN(IPV6_NDISC_HOP_LIMIT)	\
 	FN(IPV6_NDISC_BAD_CODE)		\
 	FN(IPV6_NDISC_BAD_OPTIONS)	\
+	FN(IPV6_NDISC_NS_OTHERHOST)	\
 	FNe(MAX)
 
 /**
@@ -333,6 +334,10 @@ enum skb_drop_reason {
 	SKB_DROP_REASON_IPV6_NDISC_BAD_CODE,
 	/** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */
 	SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS,
+	/** @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION
+	 * for another host.
+	 */
+	SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
-- 
cgit v1.2.3


From 50bcfe8df7c73ce51762f65d218b4ef0cc5da3ee Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 17 Feb 2023 13:28:49 +0100
Subject: net: make default_rps_mask a per netns attribute

That really was meant to be a per netns attribute from the beginning.

The idea is that once proper isolation is in place in the main
namespace, additional demux in the child namespaces will be redundant.
Let's make child netns default rps mask empty by default.

To avoid bloating the netns with a possibly large cpumask, allocate
it on-demand during the first write operation.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/core.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 8249060cf5d0..a91ef9f8de60 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -6,6 +6,7 @@
 
 struct ctl_table_header;
 struct prot_inuse;
+struct cpumask;
 
 struct netns_core {
 	/* core sysctls */
@@ -17,6 +18,10 @@ struct netns_core {
 #ifdef CONFIG_PROC_FS
 	struct prot_inuse __percpu *prot_inuse;
 #endif
+
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+	struct cpumask *rps_default_mask;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From db4b49025c0c7116f1d2dfe8d5bbfc983ac054de Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Sat, 18 Feb 2023 00:36:13 +0200
Subject: net/sched: Rename user cookie and act cookie

struct tc_action->act_cookie is a user defined cookie,
and the related struct flow_action_entry->act_cookie is
used as an handle similar to struct flow_cls_offload->cookie.

Rename tc_action->act_cookie to user_cookie, and
flow_action_entry->act_cookie to cookie so their names
would better fit their usage.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/act_api.h      | 2 +-
 include/net/flow_offload.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2a6f443f0ef6..4ae0580b63ca 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -39,7 +39,7 @@ struct tc_action {
 	struct gnet_stats_basic_sync __percpu *cpu_bstats;
 	struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
 	struct gnet_stats_queue __percpu *cpu_qstats;
-	struct tc_cookie	__rcu *act_cookie;
+	struct tc_cookie	__rcu *user_cookie;
 	struct tcf_chain	__rcu *goto_chain;
 	u32			tcfa_flags;
 	u8			hw_stats;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 8c05455b1e34..9c5cb12f8a90 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -228,7 +228,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
 struct flow_action_entry {
 	enum flow_action_id		id;
 	u32				hw_index;
-	unsigned long			act_cookie;
+	unsigned long			cookie;
 	enum flow_action_hw_stats	hw_stats;
 	action_destr			destructor;
 	void				*destructor_priv;
@@ -321,7 +321,7 @@ struct flow_action_entry {
 			u16		sid;
 		} pppoe;
 	};
-	struct flow_action_cookie *cookie; /* user defined action cookie */
+	struct flow_action_cookie *user_cookie; /* user defined action cookie */
 };
 
 struct flow_action {
-- 
cgit v1.2.3


From 80cd22c35c9001fe72bf614d29439de41933deca Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Sat, 18 Feb 2023 00:36:14 +0200
Subject: net/sched: cls_api: Support hardware miss to tc action

For drivers to support partial offload of a filter's action list,
add support for action miss to specify an action instance to
continue from in sw.

CT action in particular can't be fully offloaded, as new connections
need to be handled in software. This imposes other limitations on
the actions that can be offloaded together with the CT action, such
as packet modifications.

Assign each action on a filter's action list a unique miss_cookie
which drivers can then use to fill action_miss part of the tc skb
extension. On getting back this miss_cookie, find the action
instance with relevant cookie and continue classifying from there.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_offload.h |  1 +
 include/net/pkt_cls.h      | 34 ++++++++++++++++++++--------------
 include/net/sch_generic.h  |  2 ++
 3 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 9c5cb12f8a90..118082eae48c 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -229,6 +229,7 @@ struct flow_action_entry {
 	enum flow_action_id		id;
 	u32				hw_index;
 	unsigned long			cookie;
+	u64				miss_cookie;
 	enum flow_action_hw_stats	hw_stats;
 	action_destr			destructor;
 	void				*destructor_priv;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ace437c6754b..b3b5b0b62f16 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -59,6 +59,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 void tcf_block_put(struct tcf_block *block);
 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 		       struct tcf_block_ext_info *ei);
+int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
+		     int police, struct tcf_proto *tp, u32 handle, bool used_action_miss);
 
 static inline bool tcf_block_shared(struct tcf_block *block)
 {
@@ -229,6 +231,7 @@ struct tcf_exts {
 	struct tc_action **actions;
 	struct net	*net;
 	netns_tracker	ns_tracker;
+	struct tcf_exts_miss_cookie_node *miss_cookie_node;
 #endif
 	/* Map to export classifier specific extension TLV types to the
 	 * generic extensions API. Unsupported extensions must be set to 0.
@@ -240,21 +243,11 @@ struct tcf_exts {
 static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
 				int action, int police)
 {
-#ifdef CONFIG_NET_CLS_ACT
-	exts->type = 0;
-	exts->nr_actions = 0;
-	/* Note: we do not own yet a reference on net.
-	 * This reference might be taken later from tcf_exts_get_net().
-	 */
-	exts->net = net;
-	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
-				GFP_KERNEL);
-	if (!exts->actions)
-		return -ENOMEM;
+#ifdef CONFIG_NET_CLS
+	return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false);
+#else
+	return -EOPNOTSUPP;
 #endif
-	exts->action = action;
-	exts->police = police;
-	return 0;
 }
 
 /* Return false if the netns is being destroyed in cleanup_net(). Callers
@@ -360,6 +353,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	return TC_ACT_OK;
 }
 
+static inline int
+tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index,
+		 struct tcf_result *res)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return tcf_action_exec(skb, exts->actions + act_index,
+			       exts->nr_actions - act_index, res);
+#else
+	return TC_ACT_OK;
+#endif
+}
+
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, u32 flags,
@@ -584,6 +589,7 @@ int tc_setup_offload_action(struct flow_action *flow_action,
 void tc_cleanup_offload_action(struct flow_action *flow_action);
 int tc_setup_action(struct flow_action *flow_action,
 		    struct tc_action *actions[],
+		    u32 miss_cookie_base,
 		    struct netlink_ext_ack *extack);
 
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index af4aa66aaa4e..fab5ba3e61b7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -369,6 +369,8 @@ struct tcf_proto_ops {
 						struct nlattr **tca,
 						struct netlink_ext_ack *extack);
 	void			(*tmplt_destroy)(void *tmplt_priv);
+	struct tcf_exts *	(*get_exts)(const struct tcf_proto *tp,
+					    u32 handle);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
-- 
cgit v1.2.3


From 4d4266e3fd321fadb628ce02de641b129522c39c Mon Sep 17 00:00:00 2001
From: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Date: Sat, 18 Feb 2023 00:21:30 +0200
Subject: page_pool: add a comment explaining the fragment counter usage

When reading the page_pool code the first impression is that keeping
two separate counters, one being the page refcnt and the other being
fragment pp_frag_count, is counter-intuitive.

However without that fragment counter we don't know when to reliably
destroy or sync the outstanding DMA mappings.  So let's add a comment
explaining this part.

Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/r/20230217222130.85205-1-ilias.apalodimas@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/page_pool.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 34bf531ffc8d..ddfa0b328677 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -277,6 +277,16 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
 				  unsigned int dma_sync_size,
 				  bool allow_direct);
 
+/* pp_frag_count represents the number of writers who can update the page
+ * either by updating skb->data or via DMA mappings for the device.
+ * We can't rely on the page refcnt for that as we don't know who might be
+ * holding page references and we can't reliably destroy or sync DMA mappings
+ * of the fragments.
+ *
+ * When pp_frag_count reaches 0 we can either recycle the page if the page
+ * refcnt is 1 or return it back to the memory allocator and destroy any
+ * mappings we have.
+ */
 static inline void page_pool_fragment_page(struct page *page, long nr)
 {
 	atomic_long_set(&page->pp_frag_count, nr);
-- 
cgit v1.2.3


From fdf6491193e411087ae77bcbc6468e3e1cff99ed Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 20 Feb 2023 17:24:00 +0100
Subject: netfilter: ctnetlink: make event listener tracking global

pernet tracking doesn't work correctly because other netns might have
set NETLINK_LISTEN_ALL_NSID on its event socket.

In this case its expected that events originating in other net
namespaces are also received.

Making pernet-tracking work while also honoring NETLINK_LISTEN_ALL_NSID
requires much more intrusive changes both in netlink and nfnetlink,
f.e. adding a 'setsockopt' callback that lets nfnetlink know that the
event socket entered (or left) ALL_NSID mode.

Move to global tracking instead: if there is an event socket anywhere
on the system, all net namespaces which have conntrack enabled and
use autobind mode will allocate the ecache extension.

netlink_has_listeners() returns false only if the given group has no
subscribers in any net namespace, the 'net' argument passed to
nfnetlink_has_listeners is only used to derive the protocol (nfnetlink),
it has no other effect.

For proper NETLINK_LISTEN_ALL_NSID-aware pernet tracking of event
listeners a new netlink_has_net_listeners() is also needed.

Fixes: 90d1daa45849 ("netfilter: conntrack: add nf_conntrack_events autodetect mode")
Reported-by: Bryce Kahle <bryce.kahle@datadoghq.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/conntrack.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e1290c159184..1f463b3957c7 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -95,7 +95,6 @@ struct nf_ip_net {
 
 struct netns_ct {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-	u8 ctnetlink_has_listener;
 	bool ecache_dwork_pending;
 #endif
 	u8			sysctl_log_invalid; /* Log invalid packets */
-- 
cgit v1.2.3


From 68ba44639537de6f91fe32783766322d41848127 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 22 Feb 2023 12:07:21 -0500
Subject: sctp: add a refcnt in sctp_stream_priorities to avoid a nested loop

With this refcnt added in sctp_stream_priorities, we don't need to
traverse all streams to check if the prio is used by other streams
when freeing one stream's prio in sctp_sched_prio_free_sid(). This
can avoid a nested loop (up to 65535 * 65535), which may cause a
stuck as Ying reported:

    watchdog: BUG: soft lockup - CPU#23 stuck for 26s! [ksoftirqd/23:136]
    Call Trace:
     <TASK>
     sctp_sched_prio_free_sid+0xab/0x100 [sctp]
     sctp_stream_free_ext+0x64/0xa0 [sctp]
     sctp_stream_free+0x31/0x50 [sctp]
     sctp_association_free+0xa5/0x200 [sctp]

Note that it doesn't need to use refcount_t type for this counter,
as its accessing is always protected under the sock lock.

v1->v2:
 - add a check in sctp_sched_prio_set to avoid the possible prio_head
   refcnt overflow.

Fixes: 9ed7bfc79542 ("sctp: fix memory leak in sctp_stream_outq_migrate()")
Reported-by: Ying Xu <yinxu@redhat.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Link: https://lore.kernel.org/r/825eb0c905cb864991eba335f4a2b780e543f06b.1677085641.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sctp/structs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index afa3781e3ca2..e1f6e7fc2b11 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1412,6 +1412,7 @@ struct sctp_stream_priorities {
 	/* The next stream in line */
 	struct sctp_stream_out_ext *next;
 	__u16 prio;
+	__u16 users;
 };
 
 struct sctp_stream_out_ext {
-- 
cgit v1.2.3


From 4a02426787bf024dafdb79b362285ee325de3f5e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 3 Mar 2023 10:58:56 +0100
Subject: netfilter: tproxy: fix deadlock due to missing BH disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xtables packet traverser performs an unconditional local_bh_disable(),
but the nf_tables evaluation loop does not.

Functions that are called from either xtables or nftables must assume
that they can be called in process context.

inet_twsk_deschedule_put() assumes that no softirq interrupt can occur.
If tproxy is used from nf_tables its possible that we'll deadlock
trying to aquire a lock already held in process context.

Add a small helper that takes care of this and use it.

Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/
Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
Reported-and-tested-by: Major Dávid <major.david@balasys.hu>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 82d0e41b76f2..faa108b1ba67 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
 	return false;
 }
 
+static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw)
+{
+	local_bh_disable();
+	inet_twsk_deschedule_put(tw);
+	local_bh_enable();
+}
+
 /* assign a socket to the skb -- consumes sk */
 static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 {
-- 
cgit v1.2.3