From ed3557c947e1d4164d370cc2d69dd7eb92706f0a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:39 +0100 Subject: ieee802154: Add support for user scanning requests The ieee802154 layer should be able to scan a set of channels in order to look for beacons advertizing PANs. Supporting this involves adding two user commands: triggering scans and aborting scans. The user should also be notified when a new beacon is received and also upon scan termination. A scan request structure is created to list the requirements and to be accessed asynchronously when changing channels or receiving beacons. Mac layers may now implement the ->trigger_scan() and ->abort_scan() hooks. Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-2-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 25 +++++++++++++++++++++ include/net/nl802154.h | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d09c393d229f..76d4f95e9974 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -18,6 +18,7 @@ struct wpan_phy; struct wpan_phy_cca; +struct cfg802154_scan_request; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL struct ieee802154_llsec_device_key; @@ -67,6 +68,10 @@ struct cfg802154_ops { struct wpan_dev *wpan_dev, bool mode); int (*set_ackreq_default)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool ackreq); + int (*trigger_scan)(struct wpan_phy *wpan_phy, + struct cfg802154_scan_request *request); + int (*abort_scan)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev); #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL void (*get_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, @@ -278,6 +283,26 @@ struct ieee802154_coord_desc { bool gts_permit; }; +/** + * struct cfg802154_scan_request - Scan request + * + * @type: type of scan to be performed + * @page: page on which to perform the scan + * @channels: channels in te %page to be scanned + * @duration: time spent on each channel, calculated with: + * aBaseSuperframeDuration * (2 ^ duration + 1) + * @wpan_dev: the wpan device on which to perform the scan + * @wpan_phy: the wpan phy on which to perform the scan + */ +struct cfg802154_scan_request { + enum nl802154_scan_types type; + u8 page; + u32 channels; + u8 duration; + struct wpan_dev *wpan_dev; + struct wpan_phy *wpan_phy; +}; + struct ieee802154_llsec_key_id { u8 mode; u8 id; diff --git a/include/net/nl802154.h b/include/net/nl802154.h index b79a89d5207c..c267fa1c5aac 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -73,6 +73,9 @@ enum nl802154_commands { NL802154_CMD_DEL_SEC_LEVEL, NL802154_CMD_SCAN_EVENT, + NL802154_CMD_TRIGGER_SCAN, + NL802154_CMD_ABORT_SCAN, + NL802154_CMD_SCAN_DONE, /* add new commands above here */ @@ -134,6 +137,13 @@ enum nl802154_attrs { NL802154_ATTR_NETNS_FD, NL802154_ATTR_COORDINATOR, + NL802154_ATTR_SCAN_TYPE, + NL802154_ATTR_SCAN_FLAGS, + NL802154_ATTR_SCAN_CHANNELS, + NL802154_ATTR_SCAN_PREAMBLE_CODES, + NL802154_ATTR_SCAN_MEAN_PRF, + NL802154_ATTR_SCAN_DURATION, + NL802154_ATTR_SCAN_DONE_REASON, /* add attributes here, update the policy in nl802154.c */ @@ -259,6 +269,54 @@ enum nl802154_coord { NL802154_COORD_MAX, }; +/** + * enum nl802154_scan_types - Scan types + * + * @__NL802154_SCAN_INVALID: scan type number 0 is reserved + * @NL802154_SCAN_ED: An ED scan allows a device to obtain a measure of the peak + * energy in each requested channel + * @NL802154_SCAN_ACTIVE: Locate any coordinator transmitting Beacon frames using + * a Beacon Request command + * @NL802154_SCAN_PASSIVE: Locate any coordinator transmitting Beacon frames + * @NL802154_SCAN_ORPHAN: Relocate coordinator following a loss of synchronisation + * @NL802154_SCAN_ENHANCED_ACTIVE: Same as Active using Enhanced Beacon Request + * command instead of Beacon Request command + * @NL802154_SCAN_RIT_PASSIVE: Passive scan for RIT Data Request command frames + * instead of Beacon frames + * @NL802154_SCAN_ATTR_MAX: Maximum SCAN attribute number + */ +enum nl802154_scan_types { + __NL802154_SCAN_INVALID, + NL802154_SCAN_ED, + NL802154_SCAN_ACTIVE, + NL802154_SCAN_PASSIVE, + NL802154_SCAN_ORPHAN, + NL802154_SCAN_ENHANCED_ACTIVE, + NL802154_SCAN_RIT_PASSIVE, + + /* keep last */ + NL802154_SCAN_ATTR_MAX, +}; + +/** + * enum nl802154_scan_done_reasons - End of scan reasons + * + * @__NL802154_SCAN_DONE_REASON_INVALID: scan done reason number 0 is reserved. + * @NL802154_SCAN_DONE_REASON_FINISHED: The scan just finished naturally after + * going through all the requested and possible (complex) channels. + * @NL802154_SCAN_DONE_REASON_ABORTED: The scan was aborted upon user request. + * a Beacon Request command + * @NL802154_SCAN_DONE_REASON_MAX: Maximum scan done reason attribute number. + */ +enum nl802154_scan_done_reasons { + __NL802154_SCAN_DONE_REASON_INVALID, + NL802154_SCAN_DONE_REASON_FINISHED, + NL802154_SCAN_DONE_REASON_ABORTED, + + /* keep last */ + NL802154_SCAN_DONE_REASON_MAX, +}; + /** * enum nl802154_cca_modes - cca modes * -- cgit v1.2.3 From 44def58f5835bbfaf81902c88460fd86a551f4b7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:40 +0100 Subject: ieee802154: Define a beacon frame header This definition will be used when adding support for scanning and defines the content of a beacon frame header as in the 802.15.4 specification. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-3-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/ieee802154_netdev.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 4c33a20ea57f..2f2196049a86 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -38,6 +38,42 @@ #include +struct ieee802154_beacon_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u16 beacon_order:4, + superframe_order:4, + final_cap_slot:4, + battery_life_ext:1, + reserved0:1, + pan_coordinator:1, + assoc_permit:1; + u8 gts_count:3, + gts_reserved:4, + gts_permit:1; + u8 pend_short_addr_count:3, + reserved1:1, + pend_ext_addr_count:3, + reserved2:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + u16 assoc_permit:1, + pan_coordinator:1, + reserved0:1, + battery_life_ext:1, + final_cap_slot:4, + superframe_order:4, + beacon_order:4; + u8 gts_permit:1, + gts_reserved:4, + gts_count:3; + u8 reserved2:1, + pend_ext_addr_count:3, + reserved1:1, + pend_short_addr_count:3; +#else +#error "Please fix " +#endif +} __packed; + struct ieee802154_sechdr { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 level:3, -- cgit v1.2.3 From d2aaf2a01792ccf214f933d0b1ca2d41788c7b16 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:41 +0100 Subject: ieee802154: Introduce a helper to validate a channel This helper for now only checks if the page member and channel member are valid (in the specification range) and supported (by checking the device capabilities). Soon two new parameters will be introduced and having this helper will let us only modify its content rather than modifying the logic everywhere else in the subsystem. There is not functional change. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-4-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 76d4f95e9974..1184b543fba7 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -246,6 +246,17 @@ static inline void wpan_phy_net_set(struct wpan_phy *wpan_phy, struct net *net) write_pnet(&wpan_phy->_net, net); } +static inline bool ieee802154_chan_is_valid(struct wpan_phy *phy, + u8 page, u8 channel) +{ + if (page > IEEE802154_MAX_PAGE || + channel > IEEE802154_MAX_CHANNEL || + !(phy->supported.channels[page] & BIT(channel))) + return false; + + return true; +} + /** * struct ieee802154_addr - IEEE802.15.4 device address * @mode: Address mode from frame header. Can be one of: -- cgit v1.2.3 From 5755cd4d9432779027771e43e51d81a2994ed795 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:42 +0100 Subject: mac802154: Prepare forcing specific symbol duration The scan logic will bypass the whole ->set_channel() logic from the top by calling the driver hook to just switch between channels when required. We can no longer rely on the "current" page/channel settings to set the right symbol duration. Let's add these as new parameters to allow providing the page/channel couple that we want. There is no functional change. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-5-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 1184b543fba7..c16ae5d2dc86 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -483,6 +483,7 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) return dev_name(&phy->dev); } -void ieee802154_configure_durations(struct wpan_phy *phy); +void ieee802154_configure_durations(struct wpan_phy *phy, + unsigned int page, unsigned int channel); #endif /* __NET_CFG802154_H */ -- cgit v1.2.3 From 57588c71177f0bfc08509c2c3a9bfe32850c0786 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 3 Jan 2023 17:56:44 +0100 Subject: mac802154: Handle passive scanning Implement the core hooks in order to provide the softMAC layer support for passive scans. Scans are requested by the user and can be aborted. Changing channels manually is prohibited during scans. The implementation uses a workqueue triggered at a certain interval depending on the symbol duration for the current channel and the duration order provided. More advanced drivers with internal scheduling capabilities might require additional care but there is none mainline yet. Received beacons during a passive scan are processed in a work queue and their result forwarded to the upper layer. Active scanning is not supported yet. Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230103165644.432209-7-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index c16ae5d2dc86..0b0f81a945b6 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -314,6 +314,22 @@ struct cfg802154_scan_request { struct wpan_phy *wpan_phy; }; +/** + * struct cfg802154_mac_pkt - MAC packet descriptor (beacon/command) + * @node: MAC packets to process list member + * @skb: the received sk_buff + * @sdata: the interface on which @skb was received + * @page: page configuration when @skb was received + * @channel: channel configuration when @skb was received + */ +struct cfg802154_mac_pkt { + struct list_head node; + struct sk_buff *skb; + struct ieee802154_sub_if_data *sdata; + u8 page; + u8 channel; +}; + struct ieee802154_llsec_key_id { u8 mode; u8 id; -- cgit v1.2.3 From 57af281e5389b6fefedb3685f86847cbb0055f75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Oct 2022 21:45:42 +0100 Subject: rxrpc: Tidy up abort generation infrastructure Tidy up the abort generation infrastructure in the following ways: (1) Create an enum and string mapping table to list the reasons an abort might be generated in tracing. (2) Replace the 3-char string with the values from (1) in the places that use that to log the abort source. This gets rid of a memcpy() in the tracepoint. (3) Subsume the rxrpc_rx_eproto tracepoint with the rxrpc_abort tracepoint and use values from (1) to indicate the trace reason. (4) Always make a call to an abort function at the point of the abort rather than stashing the values into variables and using goto to get to a place where it reported. The C optimiser will collapse the calls together as appropriate. The abort functions return a value that can be returned directly if appropriate. Note that this extends into afs also at the points where that generates an abort. To aid with this, the afs sources need to #define RXRPC_TRACE_ONLY_DEFINE_ENUMS before including the rxrpc tracing header because they don't have access to the rxrpc internal structures that some of the tracepoints make use of. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/net/af_rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index d5a5ae926380..ba717eac0229 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +enum rxrpc_abort_reason; enum rxrpc_interruptibility { RXRPC_INTERRUPTIBLE, /* Call is interruptible */ @@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, struct iov_iter *, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, - u32, int, const char *); + u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); -- cgit v1.2.3 From 9053637e0da783efdb37bbfea6a27b856c0228d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 5 Jan 2023 22:33:58 -0800 Subject: devlink: remove the registration guarantee of references The objective of exposing the devlink instance locks to drivers was to let them use these locks to prevent user space from accessing the device before it's fully initialized. This is difficult because devlink_unregister() waits for all references to be released, meaning that devlink_unregister() can't itself be called under the instance lock. To avoid this issue devlink_register() was moved after subobject registration a while ago. Unfortunately the netdev paths get a hold of the devlink instances _before_ they are registered. Ideally netdev should wait for devlink init to finish (synchronizing on the instance lock). This can't work because we don't know if the instance will _ever_ be registered (in case of failures it may not). The other option of returning an error until devlink_register() is called is unappealing (user space would get a notification netdev exist but would have to wait arbitrary amount of time before accessing some of its attributes). Weaken the guarantees of the devlink references. Holding a reference will now only guarantee that the memory of the object is around. Another way of looking at it is that the reference now protects the object not its "registered" status. Use devlink instance lock to synchronize unregistration. This implies that releasing of the "main" reference of the devlink instance moves from devlink_unregister() to devlink_free(). Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 6a2e4f21779f..425ecef431b7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1647,6 +1647,8 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, return devlink_alloc_ns(ops, priv_size, &init_net, dev); } void devlink_set_features(struct devlink *devlink, u64 features); +int devl_register(struct devlink *devlink); +void devl_unregister(struct devlink *devlink); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); -- cgit v1.2.3 From 4444bc2116aecdcde87dce80373540adc8bd478b Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 30 Dec 2022 13:18:49 +0100 Subject: wifi: mac80211: Proper mark iTXQs for resumption When a running wake_tx_queue() call is aborted due to a hw queue stop the corresponding iTXQ is not always correctly marked for resumption: wake_tx_push_queue() can stops the queue run without setting @IEEE80211_TXQ_STOP_NETIF_TX. Without the @IEEE80211_TXQ_STOP_NETIF_TX flag __ieee80211_wake_txqs() will not schedule a new queue run and remaining frames in the queue get stuck till another frame is queued to it. Fix the issue for all drivers - also the ones with custom wake_tx_queue callbacks - by moving the logic into ieee80211_tx_dequeue() and drop the redundant @txqs_stopped. @IEEE80211_TXQ_STOP_NETIF_TX is also renamed to @IEEE80211_TXQ_DIRTY to better describe the flag. Fixes: c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue") Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20221230121850.218810-1-alexander@wetzel-home.de Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 689da327ce2e..e3235b9c02c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1832,8 +1832,6 @@ struct ieee80211_vif_cfg { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue - * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, - * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. @@ -1863,8 +1861,6 @@ struct ieee80211_vif { bool probe_req_reg; bool rx_mcast_action_reg; - bool txqs_stopped[IEEE80211_NUM_ACS]; - struct ieee80211_vif *mbssid_tx_vif; /* must be last */ -- cgit v1.2.3 From 952f6c9daf509f7919887c26753884fa530f8622 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 26 Dec 2022 20:16:09 +0100 Subject: wifi: mac80211: Drop stations iterator where the iterator function may sleep This reverts commit acb99b9b2a08f ("mac80211: Add stations iterator where the iterator function may sleep"). A different approach was found for the rtw88 driver where most of the problematic locks were converted to a driver-local mutex. Drop ieee80211_iterate_stations() because there are no users of that function. Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20221226191609.2934234-1-martin.blumenstingl@googlemail.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 689da327ce2e..b421a1bfc7c5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5888,9 +5888,6 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. * This version can only be used while holding the wiphy mutex. - * The driver must not call this with a lock held that it can also take in - * response to callbacks from mac80211, and it must not call this within - * callbacks made by mac80211 - both would result in deadlocks. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -5904,24 +5901,6 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, struct ieee80211_vif *vif), void *data); -/** - * ieee80211_iterate_stations - iterate stations - * - * This function iterates over all stations associated with a given - * hardware that are currently uploaded to the driver and calls the callback - * function for them. - * This function allows the iterator function to sleep, when the iterator - * function is atomic @ieee80211_iterate_stations_atomic can be used. - * - * @hw: the hardware struct of which the interfaces should be iterated over - * @iterator: the iterator function to call, cannot sleep - * @data: first argument of the iterator function - */ -void ieee80211_iterate_stations(struct ieee80211_hw *hw, - void (*iterator)(void *data, - struct ieee80211_sta *sta), - void *data); - /** * ieee80211_iterate_stations_atomic - iterate stations * -- cgit v1.2.3 From af6d10345ca76670c1b7c37799f0d5576ccef277 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Thu, 12 Jan 2023 12:25:32 +1100 Subject: ipv6: remove max_size check inline with ipv4 In ip6_dst_gc() replace: if (entries > gc_thresh) With: if (entries > ops->gc_thresh) Sending Ipv6 packets in a loop via a raw socket triggers an issue where a route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly consumes the Ipv6 max_size threshold which defaults to 4096 resulting in these warnings: [1] 99.187805] dst_alloc: 7728 callbacks suppressed [2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. . . [300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. When this happens the packet is dropped and sendto() gets a network is unreachable error: remaining pkt 200557 errno 101 remaining pkt 196462 errno 101 . . remaining pkt 126821 errno 101 Implement David Aherns suggestion to remove max_size check seeing that Ipv6 has a GC to manage memory usage. Ipv4 already does not check max_size. Here are some memory comparisons for Ipv4 vs Ipv6 with the patch: Test by running 5 instances of a program that sends UDP packets to a raw socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar program. Ipv4: Before test: MemFree: 29427108 kB Slab: 237612 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 2881 3990 192 42 2 : tunables 0 0 0 During test: MemFree: 29417608 kB Slab: 247712 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 44394 44394 192 42 2 : tunables 0 0 0 After test: MemFree: 29422308 kB Slab: 238104 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 Ipv6 with patch: Errno 101 errors are not observed anymore with the patch. Before test: MemFree: 29422308 kB Slab: 238104 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 During Test: MemFree: 29431516 kB Slab: 240940 kB ip6_dst_cache 11980 12064 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 After Test: MemFree: 29441816 kB Slab: 238132 kB ip6_dst_cache 1902 2432 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 Tested-by: Andrea Mayer Signed-off-by: Jon Maxwell Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dst_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 88ff7bb2bb9b..632086b2f644 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -16,7 +16,7 @@ struct dst_ops { unsigned short family; unsigned int gc_thresh; - int (*gc)(struct dst_ops *ops); + void (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *); -- cgit v1.2.3 From 3a415d59c1dbec9d772dbfab2d2520d98360caae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jan 2023 16:48:49 +0000 Subject: net/sched: sch_taprio: fix possible use-after-free syzbot reported a nasty crash [1] in net_tx_action() which made little sense until we got a repro. This repro installs a taprio qdisc, but providing an invalid TCA_RATE attribute. qdisc_create() has to destroy the just initialized taprio qdisc, and taprio_destroy() is called. However, the hrtimer used by taprio had already fired, therefore advance_sched() called __netif_schedule(). Then net_tx_action was trying to use a destroyed qdisc. We can not undo the __netif_schedule(), so we must wait until one cpu serviced the qdisc before we can proceed. Many thanks to Alexander Potapenko for his help. [1] BUG: KMSAN: uninit-value in queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline] BUG: KMSAN: uninit-value in do_raw_spin_trylock include/linux/spinlock.h:191 [inline] BUG: KMSAN: uninit-value in __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline] BUG: KMSAN: uninit-value in _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138 queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline] do_raw_spin_trylock include/linux/spinlock.h:191 [inline] __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline] _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138 spin_trylock include/linux/spinlock.h:359 [inline] qdisc_run_begin include/net/sch_generic.h:187 [inline] qdisc_run+0xee/0x540 include/net/pkt_sched.h:125 net_tx_action+0x77c/0x9a0 net/core/dev.c:5086 __do_softirq+0x1cc/0x7fb kernel/softirq.c:571 run_ksoftirqd+0x2c/0x50 kernel/softirq.c:934 smpboot_thread_fn+0x554/0x9f0 kernel/smpboot.c:164 kthread+0x31b/0x430 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 Uninit was created at: slab_post_alloc_hook mm/slab.h:732 [inline] slab_alloc_node mm/slub.c:3258 [inline] __kmalloc_node_track_caller+0x814/0x1250 mm/slub.c:4970 kmalloc_reserve net/core/skbuff.c:358 [inline] __alloc_skb+0x346/0xcf0 net/core/skbuff.c:430 alloc_skb include/linux/skbuff.h:1257 [inline] nlmsg_new include/net/netlink.h:953 [inline] netlink_ack+0x5f3/0x12b0 net/netlink/af_netlink.c:2436 netlink_rcv_skb+0x55d/0x6c0 net/netlink/af_netlink.c:2507 rtnetlink_rcv+0x30/0x40 net/core/rtnetlink.c:6108 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xabc/0xe90 net/socket.c:2482 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2536 __sys_sendmsg net/socket.c:2565 [inline] __do_sys_sendmsg net/socket.c:2574 [inline] __se_sys_sendmsg net/socket.c:2572 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2572 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 6.0.0-rc2-syzkaller-47461-gac3859c02d7f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Alexander Potapenko Cc: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d5517719af4e..af4aa66aaa4e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1288,4 +1288,11 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); +/* Make sure qdisc is no longer in SCHED state. */ +static inline void qdisc_synchronize(const struct Qdisc *q) +{ + while (test_bit(__QDISC_STATE_SCHED, &q->state)) + msleep(1); +} + #endif -- cgit v1.2.3 From 2032e907d8d498fcabfe24b43550c50947817c6d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Jan 2023 13:47:16 +0100 Subject: netfilter: nf_tables: avoid retpoline overhead for objref calls objref expression is builtin, so avoid calls to it for RETOLINE=y builds. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 3e825381ac5c..bedef373ec21 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -164,4 +164,8 @@ void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx); +void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ -- cgit v1.2.3 From d9e7891476057b24a1acbf10a491e5b9a1c4ae77 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Jan 2023 13:47:17 +0100 Subject: netfilter: nf_tables: avoid retpoline overhead for some ct expression calls nft_ct expression cannot be made builtin to nf_tables without also forcing the conntrack itself to be builtin. However, this can be avoided by splitting retrieval of a few selector keys that only need to access the nf_conn structure, i.e. no function calls to nf_conntrack code. Many rulesets start with something like "ct status established,related accept" With this change, this no longer requires an indirect call, which gives about 1.8% more throughput with a simple conntrack-enabled forwarding test (retpoline thunk used). Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_core.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index bedef373ec21..780a5f6ad4a6 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -61,6 +61,16 @@ struct nft_immediate_expr { extern const struct nft_expr_ops nft_cmp_fast_ops; extern const struct nft_expr_ops nft_cmp16_fast_ops; +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + u8 len; + union { + u8 dreg; + u8 sreg; + }; +}; + struct nft_payload { enum nft_payload_bases base:8; u8 offset; @@ -140,6 +150,8 @@ void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_ct_get_fast_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); enum { NFT_PAYLOAD_CTX_INNER_TUN = (1 << 0), -- cgit v1.2.3 From 585b6e1304dcc46e65dc1aaca5973b33abd0c48d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Jan 2023 15:24:11 +0100 Subject: wifi: cfg80211: remove support for static WEP This reverts commit b8676221f00d ("cfg80211: Add support for static WEP in the driver") since no driver ever ended up using it. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 03d4f4deadae..1f8f827290a2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1075,7 +1075,6 @@ struct survey_info { s8 noise; }; -#define CFG80211_MAX_WEP_KEYS 4 #define CFG80211_MAX_NUM_AKM_SUITES 10 /** @@ -1099,9 +1098,6 @@ struct survey_info { * port frames over NL80211 instead of the network interface. * @control_port_no_preauth: disables pre-auth rx over the nl80211 control * port for mac80211 - * @wep_keys: static WEP keys, if not NULL points to an array of - * CFG80211_MAX_WEP_KEYS WEP keys - * @wep_tx_key: key index (0..3) of the default TX static WEP key * @psk: PSK (for devices supporting 4-way-handshake offload) * @sae_pwd: password for SAE authentication (for devices supporting SAE * offload) @@ -1134,8 +1130,6 @@ struct cfg80211_crypto_settings { bool control_port_no_encrypt; bool control_port_over_nl80211; bool control_port_no_preauth; - struct key_params *wep_keys; - int wep_tx_key; const u8 *psk; const u8 *sae_pwd; u8 sae_pwd_len; @@ -4683,8 +4677,6 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). - * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation - * before connection. * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs, * in order to not have them reachable in normal drivers, until we have @@ -4715,7 +4707,6 @@ enum wiphy_flags { WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), - WIPHY_FLAG_HAS_STATIC_WEP = BIT(24), }; /** -- cgit v1.2.3 From 648fba791cb0f5ef6166449d056f82e6639fe268 Mon Sep 17 00:00:00 2001 From: Shivani Baranwal Date: Tue, 6 Dec 2022 20:07:15 +0530 Subject: wifi: cfg80211: Support 32 bytes KCK key in GTK rekey offload Currently, maximum KCK key length supported for GTK rekey offload is 24 bytes but with some newer AKMs the KCK key length can be 32 bytes. e.g., 00-0F-AC:24 AKM suite with SAE finite cyclic group 21. Add support to allow 32 bytes KCK keys in GTK rekey offload. Signed-off-by: Shivani Baranwal Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20221206143715.1802987-3-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1f8f827290a2..f96db7ad64f1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4682,6 +4682,7 @@ struct cfg80211_ops { * in order to not have them reachable in normal drivers, until we have * complete feature/interface combinations/etc. advertisement. No driver * should set this flag for now. + * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4694,7 +4695,7 @@ enum wiphy_flags { WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), WIPHY_FLAG_MESH_AUTH = BIT(10), - /* use hole at 11 */ + WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), /* use hole at 12 */ WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13), WIPHY_FLAG_AP_UAPSD = BIT(14), -- cgit v1.2.3 From bfc551679cd63ca3a4b3e7f338aa2bb06ce43e25 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Tue, 6 Dec 2022 13:32:26 +0530 Subject: wifi: cfg80211: Use MLD address to indicate MLD STA disconnection We use station's MLD address to report disconnection of MLD station. Update the documentation in multiple places to indicate this. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20221206080226.1702646-4-quic_vjakkam@quicinc.com [update commit message] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f96db7ad64f1..54a77d906b2d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7775,7 +7775,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, /** * cfg80211_del_sta_sinfo - notify userspace about deletion of a station * @dev: the netdev - * @mac_addr: the station's address + * @mac_addr: the station's address. For MLD station, MLD address is used. * @sinfo: the station information/statistics * @gfp: allocation flags */ @@ -7786,7 +7786,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, * cfg80211_del_sta - notify userspace about deletion of a station * * @dev: the netdev - * @mac_addr: the station's address + * @mac_addr: the station's address. For MLD station, MLD address is used. * @gfp: allocation flags */ static inline void cfg80211_del_sta(struct net_device *dev, -- cgit v1.2.3 From 42470fa093248807f668825ff14de9bc623c0d53 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Wed, 5 Oct 2022 14:54:45 -0700 Subject: wifi: mac80211: Add VHT MU-MIMO related flags in ieee80211_bss_conf Adding flags for SU Beamformer, SU Beamformee, MU Beamformer and MU Beamformee for VHT. This is utilized to pass MU-MIMO configurations from user space to driver in AP mode. Signed-off-by: Muna Sinada Link: https://lore.kernel.org/r/1665006886-23874-1-git-send-email-quic_msinada@quicinc.com [fixed indentation, removed redundant !!] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b5b80f943e82..a0f67d49be05 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -653,6 +653,14 @@ struct ieee80211_fils_discovery { * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. * @color_change_color: the bss color that will be used after the change. + * @vht_su_beamformer: in AP mode, does this BSS support operation as an VHT SU + * beamformer + * @vht_su_beamformee: in AP mode, does this BSS support operation as an VHT SU + * beamformee + * @vht_mu_beamformer: in AP mode, does this BSS support operation as an VHT MU + * beamformer + * @vht_mu_beamformee: in AP mode, does this BSS support operation as an VHT MU + * beamformee */ struct ieee80211_bss_conf { const u8 *bssid; @@ -726,6 +734,11 @@ struct ieee80211_bss_conf { bool color_change_active; u8 color_change_color; + + bool vht_su_beamformer; + bool vht_su_beamformee; + bool vht_mu_beamformer; + bool vht_mu_beamformee; }; /** -- cgit v1.2.3 From b1b3297df7db7065476666ddbca5a61d081347ef Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Wed, 5 Oct 2022 14:54:46 -0700 Subject: wifi: mac80211: Add HE MU-MIMO related flags in ieee80211_bss_conf Adding flags for SU Beamformer, SU Beamformee, MU Beamformer and Full Bandwidth UL MU-MIMO for HE. This is utilized to pass MU-MIMO configurations from user space to driver in AP mode. Signed-off-by: Muna Sinada Link: https://lore.kernel.org/r/1665006886-23874-2-git-send-email-quic_msinada@quicinc.com [fixed indentation, removed redundant !!] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a0f67d49be05..65dd3982391f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -661,6 +661,15 @@ struct ieee80211_fils_discovery { * beamformer * @vht_mu_beamformee: in AP mode, does this BSS support operation as an VHT MU * beamformee + * @he_su_beamformer: in AP-mode, does this BSS support operation as an HE SU + * beamformer + * @he_su_beamformee: in AP-mode, does this BSS support operation as an HE SU + * beamformee + * @he_mu_beamformer: in AP-mode, does this BSS support operation as an HE MU + * beamformer + * @he_full_ul_mumimo: does this BSS support the reception (AP) or transmission + * (non-AP STA) of an HE TB PPDU on an RU that spans the entire PPDU + * bandwidth */ struct ieee80211_bss_conf { const u8 *bssid; @@ -739,6 +748,10 @@ struct ieee80211_bss_conf { bool vht_su_beamformee; bool vht_mu_beamformer; bool vht_mu_beamformee; + bool he_su_beamformer; + bool he_su_beamformee; + bool he_mu_beamformer; + bool he_full_ul_mumimo; }; /** -- cgit v1.2.3 From f66c48af7a110c0d694c4ac4a1257affb272a2ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Jan 2023 13:07:21 +0200 Subject: mac80211: support minimal EHT rate reporting on RX Add minimal support for RX EHT rate reporting, not yet adding (modifying) any radiotap headers, just statistics for cfg80211. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 65dd3982391f..e83cb9519e31 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1462,6 +1462,7 @@ enum mac80211_rx_encoding { RX_ENC_HT, RX_ENC_VHT, RX_ENC_HE, + RX_ENC_EHT, }; /** @@ -1495,7 +1496,7 @@ enum mac80211_rx_encoding { * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) - * @nss: number of streams (VHT and HE only) + * @nss: number of streams (VHT, HE and EHT only) * @flag: %RX_FLAG_\* * @encoding: &enum mac80211_rx_encoding * @bw: &enum rate_info_bw @@ -1503,6 +1504,8 @@ enum mac80211_rx_encoding { * @he_ru: HE RU, from &enum nl80211_he_ru_alloc * @he_gi: HE GI, from &enum nl80211_he_gi * @he_dcm: HE DCM value + * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc + * @eht.gi: EHT GI, from &enum nl80211_eht_gi * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1524,8 +1527,18 @@ struct ieee80211_rx_status { u32 flag; u16 freq: 13, freq_offset: 1; u8 enc_flags; - u8 encoding:2, bw:3, he_ru:3; - u8 he_gi:2, he_dcm:1; + u8 encoding:3, bw:4; + union { + struct { + u8 he_ru:3; + u8 he_gi:2; + u8 he_dcm:1; + }; + struct { + u8 ru:4; + u8 gi:2; + } eht; + }; u8 rate_idx; u8 nss; u8 rx_flags; -- cgit v1.2.3 From 41ade47c1273ca0e61c36f2cccad37473f0b2422 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Jan 2023 08:52:01 +0100 Subject: wifi: mac80211: add kernel-doc for EHT structure Looks like this is required, even if all of the members are separately described. Add a line to avoid the warning. Fixes: f66c48af7a11 ("mac80211: support minimal EHT rate reporting on RX") Reported-by: Stephen Rothwell Signed-off-by: Johannes Berg --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e83cb9519e31..a945f1b1b4d8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1504,6 +1504,7 @@ enum mac80211_rx_encoding { * @he_ru: HE RU, from &enum nl80211_he_ru_alloc * @he_gi: HE GI, from &enum nl80211_he_gi * @he_dcm: HE DCM value + * @eht: EHT specific rate information * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc * @eht.gi: EHT GI, from &enum nl80211_eht_gi * @rx_flags: internal RX flags for mac80211 -- cgit v1.2.3 From 82253ddaff582147cd3fd0e629c4e65d62b1d015 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Jan 2023 14:57:11 +0100 Subject: wifi: mac80211: drop extra 'e' from ieeee80211... name Somehow an extra 'e' slipped in there without anyone noticing, drop that from ieeee80211_obss_color_collision_notify(). Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a945f1b1b4d8..2635e6de8101 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7210,7 +7210,7 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, struct ieee80211_vif *vif); /** - * ieeee80211_obss_color_collision_notify - notify userland about a BSS color + * ieee80211_obss_color_collision_notify - notify userland about a BSS color * collision. * * @vif: &struct ieee80211_vif pointer from the add_interface callback. @@ -7219,8 +7219,8 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @gfp: allocation flags */ void -ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap, gfp_t gfp); +ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, + u64 color_bitmap, gfp_t gfp); /** * ieee80211_is_tx_data - check if frame is a data frame -- cgit v1.2.3 From 5cc9049cb9021a46ad5711a946eb3ded47eed0de Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Jan 2023 16:21:04 +0100 Subject: devlink: remove linecards lock Similar to other devlink objects, convert the linecards list to be protected by devlink instance lock. Alongside with that rename the create/destroy() functions to devl_* to indicate the devlink instance lock needs to be held while calling them. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 425ecef431b7..d7c9572e5bea 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1687,9 +1687,9 @@ void devl_rate_nodes_destroy(struct devlink *devlink); void devlink_port_linecard_set(struct devlink_port *devlink_port, struct devlink_linecard *linecard); struct devlink_linecard * -devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, - const struct devlink_linecard_ops *ops, void *priv); -void devlink_linecard_destroy(struct devlink_linecard *linecard); +devl_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv); +void devl_linecard_destroy(struct devlink_linecard *linecard); void devlink_linecard_provision_set(struct devlink_linecard *linecard, const char *type); void devlink_linecard_provision_clear(struct devlink_linecard *linecard); -- cgit v1.2.3 From dfdfd1305ddecb990566193f2ba8a11bccba4cde Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Jan 2023 16:21:08 +0100 Subject: devlink: protect health reporter operation with instance lock Similar to other devlink objects, protect the reporters list by devlink instance lock. Alongside add unlocked versions of health reporter create/destroy functions and use them in drivers on call paths where the instance lock is held. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index d7c9572e5bea..0d64feaef7cb 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1865,18 +1865,34 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, const void *value, u32 value_len); struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); +devl_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv); struct devlink_health_reporter * devlink_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, u64 graceful_period, void *priv); +struct devlink_health_reporter * +devl_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv); + +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv); + +void +devl_health_reporter_destroy(struct devlink_health_reporter *reporter); + void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); +void +devl_port_health_reporter_destroy(struct devlink_health_reporter *reporter); + void devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter); -- cgit v1.2.3 From 1dea3b4e4c52f4bed64d1c527d548e82ccaea15a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Jan 2023 16:21:09 +0100 Subject: devlink: remove reporters_lock Similar to other devlink objects, rely on devlink instance lock and remove object specific reporters_lock. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0d64feaef7cb..d9ea76bea36e 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -146,7 +146,6 @@ struct devlink_port { initialized:1; struct delayed_work type_warn_dw; struct list_head reporter_list; - struct mutex reporters_lock; /* Protects reporter_list */ struct devlink_rate *devlink_rate; struct devlink_linecard *linecard; -- cgit v1.2.3 From 9f167327efecc3977deff0c852760e3759b0c2a7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 18 Jan 2023 16:21:10 +0100 Subject: devlink: remove devl*_port_health_reporter_destroy() Remove port-specific health reporter destroy function as it is currently the same as the instance one so no longer needed. Inline __devlink_health_reporter_destroy() as it is no longer called from multiple places. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index d9ea76bea36e..608a0c198be8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1889,12 +1889,6 @@ devl_health_reporter_destroy(struct devlink_health_reporter *reporter); void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); -void -devl_port_health_reporter_destroy(struct devlink_health_reporter *reporter); - -void -devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter); - void * devlink_health_reporter_priv(struct devlink_health_reporter *reporter); int devlink_health_report(struct devlink_health_reporter *reporter, -- cgit v1.2.3 From 622f1b2fae2eea28a80b04f130e3bb54227699f8 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Wed, 18 Jan 2023 22:08:27 +0100 Subject: net: dcb: add new rewrite table Add new rewrite table and all the required functions, offload hooks and bookkeeping for maintaining it. The rewrite table reuses the app struct, and the entire set of app selectors. As such, some bookeeping code can be shared between the rewrite- and the APP table. New functions for getting, setting and deleting entries has been added. Apart from operating on the rewrite list, these functions do not emit a DCB_APP_EVENT when the list os modified. The new dcb_getrewr does a lookup based on selector and priority and returns the protocol, so that mappings from priority to protocol, for a given selector and ifindex is obtained. Also, a new nested attribute has been added, that encapsulates one or more app structs. This attribute is used to distinguish the two tables. The dcb_lock used for the APP table is reused for the rewrite table. Signed-off-by: Daniel Machon Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/net/dcbnl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 8841ab6c2de7..fe7dfb8bcb5b 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -19,6 +19,10 @@ struct dcb_app_type { u8 dcbx; }; +u16 dcb_getrewr(struct net_device *dev, struct dcb_app *app); +int dcb_setrewr(struct net_device *dev, struct dcb_app *app); +int dcb_delrewr(struct net_device *dev, struct dcb_app *app); + int dcb_setapp(struct net_device *, struct dcb_app *); u8 dcb_getapp(struct net_device *, struct dcb_app *); int dcb_ieee_setapp(struct net_device *, struct dcb_app *); @@ -113,6 +117,10 @@ struct dcbnl_rtnl_ops { /* apptrust */ int (*dcbnl_setapptrust)(struct net_device *, u8 *, int); int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *); + + /* rewrite */ + int (*dcbnl_setrewr)(struct net_device *dev, struct dcb_app *app); + int (*dcbnl_delrewr)(struct net_device *dev, struct dcb_app *app); }; #endif /* __NET_DCBNL_H__ */ -- cgit v1.2.3 From 1df99338e6d4e96178b68b3e17bab33e9f1eb628 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Wed, 18 Jan 2023 22:08:28 +0100 Subject: net: dcb: add helper functions to retrieve PCP and DSCP rewrite maps Add two new helper functions to retrieve a mapping of priority to PCP and DSCP bitmasks, where each bitmap contains ones in positions that match a rewrite entry. dcb_ieee_getrewr_prio_dscp_mask_map() reuses the dcb_ieee_app_prio_map, as this struct is already used for a similar mapping in the app table. Signed-off-by: Daniel Machon Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/net/dcbnl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index fe7dfb8bcb5b..42207fc44660 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -29,12 +29,22 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *); int dcb_ieee_delapp(struct net_device *, struct dcb_app *); u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *); +struct dcb_rewr_prio_pcp_map { + u16 map[IEEE_8021QAZ_MAX_TCS]; +}; + +void dcb_getrewr_prio_pcp_mask_map(const struct net_device *dev, + struct dcb_rewr_prio_pcp_map *p_map); + struct dcb_ieee_app_prio_map { u64 map[IEEE_8021QAZ_MAX_TCS]; }; void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev, struct dcb_ieee_app_prio_map *p_map); +void dcb_getrewr_prio_dscp_mask_map(const struct net_device *dev, + struct dcb_ieee_app_prio_map *p_map); + struct dcb_ieee_app_dscp_map { u8 map[64]; }; -- cgit v1.2.3 From 20e3028c39a5bf882e91e717da96d14f1acec40e Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 19 Jan 2023 12:59:10 -0800 Subject: net: mana: Fix IRQ name - add PCI and queue number The PCI and queue number info is missing in IRQ names. Add PCI and queue number to IRQ names, to allow CPU affinity tuning scripts to work. Cc: stable@vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/1674161950-19708-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/gdma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index b3ba04615caa..56189e4252da 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -336,9 +336,12 @@ struct gdma_queue_spec { }; }; +#define MANA_IRQ_NAME_SZ 32 + struct gdma_irq_context { void (*handler)(void *arg); void *arg; + char name[MANA_IRQ_NAME_SZ]; }; struct gdma_context { -- cgit v1.2.3 From 5f6c2d498ad97cf9f85b81c0fbb205abbcdfe3f8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Jan 2023 14:27:00 +0200 Subject: net: dsa: add plumbing for changing and getting MAC merge layer state The DSA core is in charge of the ethtool_ops of the net devices associated with switch ports, so in case a hardware driver supports the MAC merge layer, DSA must pass the callbacks through to the driver. Add support for precisely that. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/dsa.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 96086289aa9b..a15f17a38eca 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -937,6 +937,17 @@ struct dsa_switch_ops { int (*get_ts_info)(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts); + /* + * ethtool MAC merge layer + */ + int (*get_mm)(struct dsa_switch *ds, int port, + struct ethtool_mm_state *state); + int (*set_mm)(struct dsa_switch *ds, int port, + struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack); + void (*get_mm_stats)(struct dsa_switch *ds, int port, + struct ethtool_mm_stats *stats); + /* * DCB ops */ -- cgit v1.2.3 From 3d76a4d3d4e591af3e789698affaad88a5a8e8ab Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 19 Jan 2023 14:15:26 -0800 Subject: bpf: XDP metadata RX kfuncs Define a new kfunc set (xdp_metadata_kfunc_ids) which implements all possible XDP metatada kfuncs. Not all devices have to implement them. If kfunc is not supported by the target device, the default implementation is called instead. The verifier, at load time, replaces a call to the generic kfunc with a call to the per-device one. Per-device kfunc pointers are stored in separate struct xdp_metadata_ops. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Jakub Kicinski Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-8-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/net/xdp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/net') diff --git a/include/net/xdp.h b/include/net/xdp.h index 55dbc68bfffc..91292aa13bc0 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -409,4 +409,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +#define XDP_METADATA_KFUNC_xxx \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ + bpf_xdp_metadata_rx_timestamp) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ + bpf_xdp_metadata_rx_hash) \ + +enum { +#define XDP_METADATA_KFUNC(name, _) name, +XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC +MAX_XDP_METADATA_KFUNC, +}; + +#ifdef CONFIG_NET +u32 bpf_xdp_metadata_kfunc_id(int id); +bool bpf_dev_bound_kfunc_id(u32 btf_id); +#else +static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } +static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } +#endif + #endif /* __LINUX_NET_XDP_H__ */ -- cgit v1.2.3 From 94ecc5ca4dbf1f01bae6e32f5cd88c0fc5dc3cc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 19 Jan 2023 14:15:33 -0800 Subject: xsk: Add cb area to struct xdp_buff_xsk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an area after the xdp_buff in struct xdp_buff_xsk that drivers can use to stash extra information to use in metadata kfuncs. The maximum size of 24 bytes means the full xdp_buff_xsk structure will take up exactly two cache lines (with the cb field spanning both). Also add a macro drivers can use to check their own wrapping structs against the available size. Cc: John Fastabend Cc: David Ahern Cc: Martin KaFai Lau Cc: Jakub Kicinski Cc: Willem de Bruijn Cc: Jesper Dangaard Brouer Cc: Anatoly Burakov Cc: Alexander Lobakin Cc: Magnus Karlsson Cc: Maryam Tahhan Cc: xdp-hints@xdp-project.net Cc: netdev@vger.kernel.org Suggested-by: Jakub Kicinski Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230119221536.3349901-15-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/net/xsk_buff_pool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index f787c3f524b0..3e952e569418 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -19,8 +19,11 @@ struct xdp_sock; struct device; struct page; +#define XSK_PRIV_MAX 24 + struct xdp_buff_xsk { struct xdp_buff xdp; + u8 cb[XSK_PRIV_MAX]; dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; @@ -28,6 +31,8 @@ struct xdp_buff_xsk { struct list_head free_list_node; }; +#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) + struct xsk_dma_map { dma_addr_t *dma_pages; struct device *dev; -- cgit v1.2.3 From 90317bcdbd337b9e88f253650f6ab9dfe667be64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 23 Jan 2023 18:47:09 +0100 Subject: ipv6: Make ip6_route_output_flags_noref() static. This function is only used in net/ipv6/route.c and has no reason to be visible outside of it. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/50706db7f675e40b3594d62011d9363dce32b92e.1674495822.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 035d61d50a98..81ee387a1fc4 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -84,10 +84,6 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); -struct dst_entry *ip6_route_output_flags_noref(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, int flags); - struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); -- cgit v1.2.3 From 89e7d2ba61b742a7525ff06ea4d4378c4a5560d0 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:48 +0100 Subject: net/ism: Add new API for client registration Add a new API that allows other drivers to concurrently access ISM devices. To do so, we introduce a new API that allows other modules to register for ISM device usage. Furthermore, we move the GID to struct ism, where it belongs conceptually, and rename and relocate struct smcd_event to struct ism_event. This is the first part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/net/smc.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h index c926d3313e05..98689b16b841 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -15,6 +15,7 @@ #include #include #include +#include "linux/ism.h" struct sock; @@ -48,14 +49,6 @@ struct smcd_dmb { #define ISM_ERROR 0xFFFF -struct smcd_event { - u32 type; - u32 code; - u64 tok; - u64 time; - u64 info; -}; - struct smcd_dev; struct smcd_ops { @@ -100,6 +93,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, int smcd_register_dev(struct smcd_dev *smcd); void smcd_unregister_dev(struct smcd_dev *smcd); void smcd_free_dev(struct smcd_dev *smcd); -void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event); +void smcd_handle_event(struct smcd_dev *dev, struct ism_event *event); void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask); #endif /* _SMC_H */ -- cgit v1.2.3 From 8747716f3942a610efdd12e3655df47269c268ac Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:49 +0100 Subject: net/smc: Register SMC-D as ISM client Register the smc module with the new ism device driver API. This is the second part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/net/smc.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h index 98689b16b841..151aa54d9ad2 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -90,9 +90,6 @@ struct smcd_dev { struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, const struct smcd_ops *ops, int max_dmbs); -int smcd_register_dev(struct smcd_dev *smcd); -void smcd_unregister_dev(struct smcd_dev *smcd); void smcd_free_dev(struct smcd_dev *smcd); -void smcd_handle_event(struct smcd_dev *dev, struct ism_event *event); -void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit, u16 dmbemask); + #endif /* _SMC_H */ -- cgit v1.2.3 From 9de4df7b6be1cfca500f8ba21137d53eec45418a Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:50 +0100 Subject: net/smc: Separate SMC-D and ISM APIs We separate the code implementing the struct smcd_ops API in the ISM device driver from the functions that may be used by other exploiters of ISM devices. Note: We start out small, and don't offer the whole breadth of the ISM device for public use, as many functions are specific to or likely only ever used in the context of SMC-D. This is the third part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/net/smc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h index 151aa54d9ad2..d5f8f18169d7 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -66,14 +66,15 @@ struct smcd_ops { bool sf, unsigned int offset, void *data, unsigned int size); u8* (*get_system_eid)(void); + u64 (*get_local_gid)(struct smcd_dev *dev); u16 (*get_chid)(struct smcd_dev *dev); }; struct smcd_dev { const struct smcd_ops *ops; struct device dev; + struct ism_dev *ism; void *priv; - u64 local_gid; struct list_head list; spinlock_t lock; struct smc_connection **conn; -- cgit v1.2.3 From 820f21009f1bc7a69e28752f6c6d9544401ca526 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:51 +0100 Subject: s390/ism: Consolidate SMC-D-related code The ism module had SMC-D-specific code sprinkled across the entire module. We are now consolidating the SMC-D-specific parts into the latter parts of the module, so it becomes more clear what code is intended for use with ISM, and which parts are glue code for usage in the context of SMC-D. This is the fourth part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/net/smc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h index d5f8f18169d7..556b96c12279 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -50,11 +50,13 @@ struct smcd_dmb { #define ISM_ERROR 0xFFFF struct smcd_dev; +struct ism_client; struct smcd_ops { int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid, u32 vid); - int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, + struct ism_client *client); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); @@ -73,7 +75,6 @@ struct smcd_ops { struct smcd_dev { const struct smcd_ops *ops; struct device dev; - struct ism_dev *ism; void *priv; struct list_head list; spinlock_t lock; -- cgit v1.2.3 From 8c81ba20349daf9f7e58bb05a0c12f4b71813a30 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jan 2023 19:17:52 +0100 Subject: net/smc: De-tangle ism and smc device initialization The struct device for ISM devices was part of struct smcd_dev. Move to struct ism_dev, provide a new API call in struct smcd_ops, and convert existing SMCD code accordingly. Furthermore, remove struct smcd_dev from struct ism_dev. This is the final part of a bigger overhaul of the interfaces between SMC and ISM. Signed-off-by: Stefan Raspl Signed-off-by: Jan Karcher Signed-off-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/net/smc.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/smc.h b/include/net/smc.h index 556b96c12279..597cb9381182 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -70,11 +70,11 @@ struct smcd_ops { u8* (*get_system_eid)(void); u64 (*get_local_gid)(struct smcd_dev *dev); u16 (*get_chid)(struct smcd_dev *dev); + struct device* (*get_dev)(struct smcd_dev *dev); }; struct smcd_dev { const struct smcd_ops *ops; - struct device dev; void *priv; struct list_head list; spinlock_t lock; @@ -90,8 +90,4 @@ struct smcd_dev { u8 going_away : 1; }; -struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, - const struct smcd_ops *ops, int max_dmbs); -void smcd_free_dev(struct smcd_dev *smcd); - #endif /* _SMC_H */ -- cgit v1.2.3 From 91d0b78c5177f3e42a4d8738af8ac19c3a90d002 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 24 Jan 2023 14:36:43 +0100 Subject: inet: Add IP_LOCAL_PORT_RANGE socket option Users who want to share a single public IP address for outgoing connections between several hosts traditionally reach for SNAT. However, SNAT requires state keeping on the node(s) performing the NAT. A stateless alternative exists, where a single IP address used for egress can be shared between several hosts by partitioning the available ephemeral port range. In such a setup: 1. Each host gets assigned a disjoint range of ephemeral ports. 2. Applications open connections from the host-assigned port range. 3. Return traffic gets routed to the host based on both, the destination IP and the destination port. An application which wants to open an outgoing connection (connect) from a given port range today can choose between two solutions: 1. Manually pick the source port by bind()'ing to it before connect()'ing the socket. This approach has a couple of downsides: a) Search for a free port has to be implemented in the user-space. If the chosen 4-tuple happens to be busy, the application needs to retry from a different local port number. Detecting if 4-tuple is busy can be either easy (TCP) or hard (UDP). In TCP case, the application simply has to check if connect() returned an error (EADDRNOTAVAIL). That is assuming that the local port sharing was enabled (REUSEADDR) by all the sockets. # Assume desired local port range is 60_000-60_511 s = socket(AF_INET, SOCK_STREAM) s.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) s.bind(("192.0.2.1", 60_000)) s.connect(("1.1.1.1", 53)) # Fails only if 192.0.2.1:60000 -> 1.1.1.1:53 is busy # Application must retry with another local port In case of UDP, the network stack allows binding more than one socket to the same 4-tuple, when local port sharing is enabled (REUSEADDR). Hence detecting the conflict is much harder and involves querying sock_diag and toggling the REUSEADDR flag [1]. b) For TCP, bind()-ing to a port within the ephemeral port range means that no connecting sockets, that is those which leave it to the network stack to find a free local port at connect() time, can use the this port. IOW, the bind hash bucket tb->fastreuse will be 0 or 1, and the port will be skipped during the free port search at connect() time. 2. Isolate the app in a dedicated netns and use the use the per-netns ip_local_port_range sysctl to adjust the ephemeral port range bounds. The per-netns setting affects all sockets, so this approach can be used only if: - there is just one egress IP address, or - the desired egress port range is the same for all egress IP addresses used by the application. For TCP, this approach avoids the downsides of (1). Free port search and 4-tuple conflict detection is done by the network stack: system("sysctl -w net.ipv4.ip_local_port_range='60000 60511'") s = socket(AF_INET, SOCK_STREAM) s.setsockopt(SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1) s.bind(("192.0.2.1", 0)) s.connect(("1.1.1.1", 53)) # Fails if all 4-tuples 192.0.2.1:60000-60511 -> 1.1.1.1:53 are busy For UDP this approach has limited applicability. Setting the IP_BIND_ADDRESS_NO_PORT socket option does not result in local source port being shared with other connected UDP sockets. Hence relying on the network stack to find a free source port, limits the number of outgoing UDP flows from a single IP address down to the number of available ephemeral ports. To put it another way, partitioning the ephemeral port range between hosts using the existing Linux networking API is cumbersome. To address this use case, add a new socket option at the SOL_IP level, named IP_LOCAL_PORT_RANGE. The new option can be used to clamp down the ephemeral port range for each socket individually. The option can be used only to narrow down the per-netns local port range. If the per-socket range lies outside of the per-netns range, the latter takes precedence. UAPI-wise, the low and high range bounds are passed to the kernel as a pair of u16 values in host byte order packed into a u32. This avoids pointer passing. PORT_LO = 40_000 PORT_HI = 40_511 s = socket(AF_INET, SOCK_STREAM) v = struct.pack("I", PORT_HI << 16 | PORT_LO) s.setsockopt(SOL_IP, IP_LOCAL_PORT_RANGE, v) s.bind(("127.0.0.1", 0)) s.getsockname() # Local address between ("127.0.0.1", 40_000) and ("127.0.0.1", 40_511), # if there is a free port. EADDRINUSE otherwise. [1] https://github.com/cloudflare/cloudflare-blog/blob/232b432c1d57/2022-02-connectx/connectx.py#L116 Reviewed-by: Marek Majkowski Reviewed-by: Kuniyuki Iwashima Signed-off-by: Jakub Sitnicki Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 4 ++++ include/net/ip.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index bf5654ce711e..51857117ac09 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,6 +249,10 @@ struct inet_sock { __be32 mc_addr; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; + struct { + __u16 lo; + __u16 hi; + } local_port_range; }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ diff --git a/include/net/ip.h b/include/net/ip.h index 144bdfbb25af..c3fffaa92d6e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -340,7 +340,8 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } -void inet_get_local_port_range(struct net *net, int *low, int *high); +void inet_get_local_port_range(const struct net *net, int *low, int *high); +void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); #ifdef CONFIG_SYSCTL static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) -- cgit v1.2.3 From 68f4eae781dd25aca2eb84ca2279663689db8d19 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:16 -0800 Subject: net: checksum: drop the linux/uaccess.h include net/checksum.h pulls in linux/uaccess.h which is large. In the x86 header the include seems to not be needed at all. ARM on the other hand does not include uaccess.h, even tho it calls access_ok(). In the generic implementation guard the include of linux/uaccess.h with the same condition as the code that needs it. With this change pre-processed net/checksum.h shrinks on x86 from 30616 lines to just 1193. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/checksum.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/checksum.h b/include/net/checksum.h index 6bc783b7a06c..1338cb92c8e7 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -18,8 +18,10 @@ #include #include #include -#include #include +#if !defined(_HAVE_ARCH_COPY_AND_CSUM_FROM_USER) || !defined(HAVE_CSUM_COPY_USER) +#include +#endif #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static __always_inline -- cgit v1.2.3 From 21bf73158fe7ae8a3d55618e58edc958f84739a8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:24 -0800 Subject: net: remove unnecessary includes from net/flow.h This file is included by a lot of other commonly included headers, it doesn't need socket.h or flow_dissector.h. This reduces the size of this file after pre-processing from 28165 to 4663. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 2f0da4f0318b..bb8651a6eaa7 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -8,12 +8,13 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H -#include #include #include -#include +#include #include +struct flow_keys; + /* * ifindex generation is per-net namespace, and loopback is * always the 1st device in ns (see net_dev_init), thus any -- cgit v1.2.3 From 020dd127a3fef9dfc6c03cd5d1c231d5e55d7632 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 26 Jan 2023 08:58:29 +0100 Subject: devlink: make devlink_param_register/unregister static There is no user outside the devlink code, so remove the export and make the functions static. Move them before callers to avoid forward declarations. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 608a0c198be8..cf74b6391896 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1773,10 +1773,6 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); -int devlink_param_register(struct devlink *devlink, - const struct devlink_param *param); -void devlink_param_unregister(struct devlink *devlink, - const struct devlink_param *param); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, -- cgit v1.2.3 From 85fe0b324c830ac671b811efc70ea80c3dcb2390 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 26 Jan 2023 08:58:33 +0100 Subject: devlink: make devlink_param_driverinit_value_set() return void devlink_param_driverinit_value_set() currently returns int with possible error, but no user is checking it anyway. The only reason for a fail is a driver bug. So convert the function to return void and put WARN_ONs on error paths. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index cf74b6391896..e0d773dfa637 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1775,8 +1775,8 @@ void devlink_params_unregister(struct devlink *devlink, size_t params_count); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val); -int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val); +void devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, + union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); struct devlink_region *devl_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, -- cgit v1.2.3 From 075935f0ae0fbbe469a911d685f6cc59de892700 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 26 Jan 2023 08:58:35 +0100 Subject: devlink: protect devlink param list by instance lock Commit 1d18bb1a4ddd ("devlink: allow registering parameters after the instance") as the subject implies introduced possibility to register devlink params even for already registered devlink instance. This is a bit problematic, as the consistency or params list was originally secured by the fact it is static during devlink lifetime. So in order to protect the params list, take devlink instance lock during the params operations. Introduce unlocked function variants and use them in drivers in locked context. Put lock assertions to appropriate places. Signed-off-by: Jiri Pirko Reviewed-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Tested-by: Simon Horman Signed-off-by: David S. Miller --- include/net/devlink.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index e0d773dfa637..ab654cf552b8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1767,17 +1767,23 @@ void devl_resource_occ_get_unregister(struct devlink *devlink, void devlink_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id); +int devl_params_register(struct devlink *devlink, + const struct devlink_param *params, + size_t params_count); int devlink_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +void devl_params_unregister(struct devlink *devlink, + const struct devlink_param *params, + size_t params_count); void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); -int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val); -void devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val); -void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, + union devlink_param_value *init_val); +void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, + union devlink_param_value init_val); +void devl_param_value_changed(struct devlink *devlink, u32 param_id); struct devlink_region *devl_region_create(struct devlink *devlink, const struct devlink_region_ops *ops, u32 region_max_snapshots, -- cgit v1.2.3 From 70eb3911d80f548a76fb9a40c8a3fd93ac061a42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Jan 2023 08:45:06 +0100 Subject: net: netlink: recommend policy range validation For large ranges (outside of s16) the documentation currently recommends open-coding the validation, but it's better to use the NLA_POLICY_FULL_RANGE() or NLA_POLICY_FULL_RANGE_SIGNED() policy validation instead; recommend that. Signed-off-by: Johannes Berg Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20230127084506.09f280619d64.I5dece85f06efa8ab0f474ca77df9e26d3553d4ab@changeid Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 6e1e670e06bc..b12cd957abb4 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -276,7 +276,8 @@ enum nla_policy_validation { * Note that in the interest of code simplicity and * struct size both limits are s16, so you cannot * enforce a range that doesn't fall within the range - * of s16 - do that as usual in the code instead. + * of s16 - do that using the NLA_POLICY_FULL_RANGE() + * or NLA_POLICY_FULL_RANGE_SIGNED() macros instead. * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and * NLA_POLICY_RANGE() macros. * NLA_U8, -- cgit v1.2.3 From 9bc114504b07207d671593f6f6d787d55dcf91bd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jan 2023 11:29:22 +0100 Subject: ieee802154: Add support for user beaconing requests Parse user requests for sending beacons, start sending beacons at a regular pace. If needed, the pace can be updated with a new request. The process can also be interrupted at any moment. The page and channel must be changed beforehands if needed. Interval orders above 14 are reserved to tell a device it must answer BEACON_REQ coming from another device as part of an active scan procedure and this is not yet supported. A netlink "beacon request" structure is created to list the requirements. Mac layers may now implement the ->send_beacons() and ->stop_beacons() hooks. Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230125102923.135465-2-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 23 +++++++++++++++++++++++ include/net/nl802154.h | 3 +++ 2 files changed, 26 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 0b0f81a945b6..0c2778a836db 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -19,6 +19,7 @@ struct wpan_phy; struct wpan_phy_cca; struct cfg802154_scan_request; +struct cfg802154_beacon_request; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL struct ieee802154_llsec_device_key; @@ -72,6 +73,10 @@ struct cfg802154_ops { struct cfg802154_scan_request *request); int (*abort_scan)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); + int (*send_beacons)(struct wpan_phy *wpan_phy, + struct cfg802154_beacon_request *request); + int (*stop_beacons)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev); #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL void (*get_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, @@ -314,6 +319,24 @@ struct cfg802154_scan_request { struct wpan_phy *wpan_phy; }; +/** + * struct cfg802154_beacon_request - Beacon request descriptor + * + * @interval: interval n between sendings, in multiple order of the super frame + * duration: aBaseSuperframeDuration * (2^n) unless the interval + * order is greater or equal to 15, in this case beacons won't be + * passively sent out at a fixed rate but instead inform the device + * that it should answer beacon requests as part of active scan + * procedures + * @wpan_dev: the concerned wpan device + * @wpan_phy: the wpan phy this was for + */ +struct cfg802154_beacon_request { + u8 interval; + struct wpan_dev *wpan_dev; + struct wpan_phy *wpan_phy; +}; + /** * struct cfg802154_mac_pkt - MAC packet descriptor (beacon/command) * @node: MAC packets to process list member diff --git a/include/net/nl802154.h b/include/net/nl802154.h index c267fa1c5aac..8cd9d141f5af 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -76,6 +76,8 @@ enum nl802154_commands { NL802154_CMD_TRIGGER_SCAN, NL802154_CMD_ABORT_SCAN, NL802154_CMD_SCAN_DONE, + NL802154_CMD_SEND_BEACONS, + NL802154_CMD_STOP_BEACONS, /* add new commands above here */ @@ -144,6 +146,7 @@ enum nl802154_attrs { NL802154_ATTR_SCAN_MEAN_PRF, NL802154_ATTR_SCAN_DURATION, NL802154_ATTR_SCAN_DONE_REASON, + NL802154_ATTR_BEACON_INTERVAL, /* add attributes here, update the policy in nl802154.c */ -- cgit v1.2.3 From 3accf4762734a69ebd03cba989249c78ac7dfc7e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 25 Jan 2023 11:29:23 +0100 Subject: mac802154: Handle basic beaconing Implement the core hooks in order to provide the softMAC layer support for sending beacons. Coordinators may be requested to send beacons in a beacon enabled PAN in order for the other devices around to self discover the available PANs automatically. Changing the channels is prohibited while a beacon operation is ongoing. The implementation uses a workqueue triggered at a certain interval depending on the symbol duration for the current channel and the interval order provided. Sending beacons in response to a BEACON_REQ frame (ie. answering active scans) is not yet supported. This initial patchset has no security support (llsec). Co-developed-by: David Girault Signed-off-by: David Girault Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230125102923.135465-3-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- include/net/ieee802154_netdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 2f2196049a86..da8a3e648c7a 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -129,6 +129,13 @@ enum ieee802154_frame_version { IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD, }; +enum ieee802154_addressing_mode { + IEEE802154_NO_ADDRESSING, + IEEE802154_RESERVED, + IEEE802154_SHORT_ADDRESSING, + IEEE802154_EXTENDED_ADDRESSING, +}; + struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; @@ -137,6 +144,11 @@ struct ieee802154_hdr { struct ieee802154_sechdr sec; }; +struct ieee802154_beacon_frame { + struct ieee802154_hdr mhr; + struct ieee802154_beacon_hdr mac_pl; +}; + /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame @@ -162,6 +174,10 @@ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, */ int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr); +/* pushes a beacon frame into an skb */ +int ieee802154_beacon_push(struct sk_buff *skb, + struct ieee802154_beacon_frame *beacon); + int ieee802154_max_payload(const struct ieee802154_hdr *hdr); static inline int -- cgit v1.2.3 From fb8421a94c5613fee86e192bab0892ecb1d56e4c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 27 Jan 2023 16:50:42 +0100 Subject: devlink: remove devlink features Devlink features were introduced to disallow devlink reload calls of userspace before the devlink was fully initialized. The reason for this workaround was the fact that devlink reload was originally called without devlink instance lock held. However, with recent changes that converted devlink reload to be performed under devlink instance lock, this is redundant so remove devlink features entirely. Note that mlx5 used this to enable devlink reload conditionally only when device didn't act as multi port slave. Move the multi port check into mlx5_devlink_reload_down() callback alongside with the other checks preventing the device from reload in certain states. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/devlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index ab654cf552b8..2e85a5970a32 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1645,7 +1645,7 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, { return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -void devlink_set_features(struct devlink *devlink, u64 features); + int devl_register(struct devlink *devlink); void devl_unregister(struct devlink *devlink); void devlink_register(struct devlink *devlink); -- cgit v1.2.3 From 058a8f7f73aae1cc22b53fcefec031b9e391b54d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Jan 2023 10:58:30 -0500 Subject: net: add a couple of helpers for iph tot_len This patch adds three APIs to replace the iph->tot_len setting and getting in all places where IPv4 BIG TCP packets may reach, they will be used in the following patches. Note that iph_totlen() will be used when iph is not in linear data of the skb. Signed-off-by: Xin Long Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/route.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6e92dd5bcd61..fe00b0a2e475 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -35,9 +35,6 @@ #include #include -/* IPv4 datagram length is stored into 16bit field (tot_len) */ -#define IP_MAX_MTU 0xFFFFU - #define RTO_ONLINK 0x01 #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) -- cgit v1.2.3 From a13fbf5ed5b4fc9095f12e955ca3a59b5507ff01 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Jan 2023 10:58:34 -0500 Subject: netfilter: use skb_ip_totlen and iph_totlen There are also quite some places in netfilter that may process IPv4 TCP GSO packets, we need to replace them too. In length_mt(), we have to use u_int32_t/int to accept skb_ip_totlen() return value, otherwise it may overflow and mismatch. This change will also help us add selftest for IPv4 BIG TCP in the following patch. Note that we don't need to replace the one in tcpmss_tg4(), as it will return if there is data after tcphdr in tcpmss_mangle_packet(). The same in mangle_contents() in nf_nat_helper.c, it returns false when skb->len + extra > 65535 in enlarge_skb(). Signed-off-by: Xin Long Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_tables_ipv4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 112708f7a6b4..947973623dc7 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -29,7 +29,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) if (iph->ihl < 5 || iph->version != 4) return -1; - len = ntohs(iph->tot_len); + len = iph_totlen(pkt->skb, iph); thoff = iph->ihl * 4; if (pkt->skb->len < len) return -1; @@ -64,7 +64,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; - len = ntohs(iph->tot_len); + len = iph_totlen(pkt->skb, iph); thoff = iph->ihl * 4; if (pkt->skb->len < len) { __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS); -- cgit v1.2.3 From 52cf89f78c01bf39973f3e70d366921d70faff7a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 31 Jan 2023 16:05:11 -0300 Subject: net/sched: transition act_pedit to rcu and percpu stats The software pedit action didn't get the same love as some of the other actions and it's still using spinlocks and shared stats in the datapath. Transition the action to rcu and percpu stats as this improves the action's performance dramatically on multiple cpu deployments. Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/tc_act/tc_pedit.h | 81 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 3e02709a1df6..83fe39931781 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -4,22 +4,29 @@ #include #include +#include struct tcf_pedit_key_ex { enum pedit_header_type htype; enum pedit_cmd cmd; }; -struct tcf_pedit { - struct tc_action common; - unsigned char tcfp_nkeys; - unsigned char tcfp_flags; - u32 tcfp_off_max_hint; +struct tcf_pedit_parms { struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; + u32 tcfp_off_max_hint; + unsigned char tcfp_nkeys; + unsigned char tcfp_flags; + struct rcu_head rcu; +}; + +struct tcf_pedit { + struct tc_action common; + struct tcf_pedit_parms __rcu *parms; }; #define to_pedit(a) ((struct tcf_pedit *)a) +#define to_pedit_parms(a) (rcu_dereference(to_pedit(a)->parms)) static inline bool is_tcf_pedit(const struct tc_action *a) { @@ -32,37 +39,81 @@ static inline bool is_tcf_pedit(const struct tc_action *a) static inline int tcf_pedit_nkeys(const struct tc_action *a) { - return to_pedit(a)->tcfp_nkeys; + struct tcf_pedit_parms *parms; + int nkeys; + + rcu_read_lock(); + parms = to_pedit_parms(a); + nkeys = parms->tcfp_nkeys; + rcu_read_unlock(); + + return nkeys; } static inline u32 tcf_pedit_htype(const struct tc_action *a, int index) { - if (to_pedit(a)->tcfp_keys_ex) - return to_pedit(a)->tcfp_keys_ex[index].htype; + u32 htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + struct tcf_pedit_parms *parms; + + rcu_read_lock(); + parms = to_pedit_parms(a); + if (parms->tcfp_keys_ex) + htype = parms->tcfp_keys_ex[index].htype; + rcu_read_unlock(); - return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + return htype; } static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index) { - if (to_pedit(a)->tcfp_keys_ex) - return to_pedit(a)->tcfp_keys_ex[index].cmd; + struct tcf_pedit_parms *parms; + u32 cmd = __PEDIT_CMD_MAX; - return __PEDIT_CMD_MAX; + rcu_read_lock(); + parms = to_pedit_parms(a); + if (parms->tcfp_keys_ex) + cmd = parms->tcfp_keys_ex[index].cmd; + rcu_read_unlock(); + + return cmd; } static inline u32 tcf_pedit_mask(const struct tc_action *a, int index) { - return to_pedit(a)->tcfp_keys[index].mask; + struct tcf_pedit_parms *parms; + u32 mask; + + rcu_read_lock(); + parms = to_pedit_parms(a); + mask = parms->tcfp_keys[index].mask; + rcu_read_unlock(); + + return mask; } static inline u32 tcf_pedit_val(const struct tc_action *a, int index) { - return to_pedit(a)->tcfp_keys[index].val; + struct tcf_pedit_parms *parms; + u32 val; + + rcu_read_lock(); + parms = to_pedit_parms(a); + val = parms->tcfp_keys[index].val; + rcu_read_unlock(); + + return val; } static inline u32 tcf_pedit_offset(const struct tc_action *a, int index) { - return to_pedit(a)->tcfp_keys[index].off; + struct tcf_pedit_parms *parms; + u32 off; + + rcu_read_lock(); + parms = to_pedit_parms(a); + off = parms->tcfp_keys[index].off; + rcu_read_unlock(); + + return off; } #endif /* __NET_TC_PED_H */ -- cgit v1.2.3 From e4d0fe71f59dc5137a2793ff7560730d80d1e1f4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Feb 2023 19:56:53 +0200 Subject: ipvs: avoid kfree_rcu without 2nd arg Avoid possible synchronize_rcu() as part from the kfree_rcu() call when 2nd arg is not provided. Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c6c61100d244..6d71a5ff52df 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -461,6 +461,7 @@ void ip_vs_stats_free(struct ip_vs_stats *stats); /* Multiple chains processed in same tick */ struct ip_vs_est_tick_data { + struct rcu_head rcu_head; struct hlist_head chains[IPVS_EST_TICK_CHAINS]; DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS); DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS); -- cgit v1.2.3 From d3d854fd6a1d97157f790604e07f6386e8df8fe4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Feb 2023 11:24:17 +0100 Subject: netdev-genl: create a simple family for netdev stuff Add a Netlink spec-compatible family for netdevs. This is a very simple implementation without much thought going into it. It allows us to reap all the benefits of Netlink specs, one can use the generic client to issue the commands: $ ./cli.py --spec netdev.yaml --dump dev_get [{'ifindex': 1, 'xdp-features': set()}, {'ifindex': 2, 'xdp-features': {'basic', 'ndo-xmit', 'redirect'}}, {'ifindex': 3, 'xdp-features': {'rx-sg'}}] the generic python library does not have flags-by-name support, yet, but we also don't have to carry strings in the messages, as user space can get the names from the spec. Acked-by: Jesper Dangaard Brouer Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Marek Majtyka Signed-off-by: Marek Majtyka Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/327ad9c9868becbe1e601b580c962549c8cd81f2.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/xdp.h b/include/net/xdp.h index 91292aa13bc0..8d1c86914f4c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -7,6 +7,7 @@ #define __LINUX_NET_XDP_H__ #include /* skb_shared_info */ +#include /** * DOC: XDP RX-queue information @@ -43,6 +44,8 @@ enum xdp_mem_type { MEM_TYPE_MAX, }; +typedef u32 xdp_features_t; + /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH -- cgit v1.2.3 From 66c0e13ad236c74ea88c7c1518f3cef7f372e3da Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 1 Feb 2023 11:24:18 +0100 Subject: drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder Reviewed-by: Simon Horman Acked-by: Stanislav Fomichev Acked-by: Jakub Kicinski Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Marek Majtyka Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/xdp.h b/include/net/xdp.h index 8d1c86914f4c..d517bfac937b 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -428,9 +428,21 @@ MAX_XDP_METADATA_KFUNC, #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); +void xdp_features_clear_redirect_target(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } + +static inline void +xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ +} + +static inline void +xdp_features_clear_redirect_target(struct net_device *dev) +{ +} #endif #endif /* __LINUX_NET_XDP_H__ */ -- cgit v1.2.3 From 2798e36dc233a409a5d3f26f73029596dc504020 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Feb 2023 17:43:45 +0000 Subject: tcp: add TCP_MINTTL drop reason In the unlikely case incoming packets are dropped because of IP_MINTTL / IPV6_MINHOPCOUNT constraints... Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230201174345.2708943-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 70539288f995..94bc3d5d8803 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -71,6 +71,7 @@ FN(DUP_FRAG) \ FN(FRAG_REASM_TIMEOUT) \ FN(FRAG_TOO_FAR) \ + FN(TCP_MINTTL) \ FNe(MAX) /** @@ -312,6 +313,11 @@ enum skb_drop_reason { * (/proc/sys/net/ipv4/ipfrag_max_dist) */ SKB_DROP_REASON_FRAG_TOO_FAR, + /** + * @SKB_DROP_REASON_TCP_MINTTL: ipv4 ttl or ipv6 hoplimit below + * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT). + */ + SKB_DROP_REASON_TCP_MINTTL, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' -- cgit v1.2.3 From 8f84780b84d645d6e35467f4a6f3236b20d7f4b2 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 1 Feb 2023 17:30:56 +0100 Subject: netfilter: flowtable: allow unidirectional rules Modify flow table offload to support unidirectional connections by extending enum nf_flow_flags with new "NF_FLOW_HW_BIDIRECTIONAL" flag. Only offload reply direction when the flag is set. This infrastructure change is necessary to support offloading UDP NEW connections in original direction in following patches in series. Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index cd982f4a0f50..88ab98ab41d9 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -164,6 +164,7 @@ enum nf_flow_flags { NF_FLOW_HW_DYING, NF_FLOW_HW_DEAD, NF_FLOW_HW_PENDING, + NF_FLOW_HW_BIDIRECTIONAL, }; enum flow_offload_type { -- cgit v1.2.3 From 1a441a9b8be8849957a01413a144f84932c324cb Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 1 Feb 2023 17:30:57 +0100 Subject: netfilter: flowtable: cache info of last offload Modify flow table offload to cache the last ct info status that was passed to the driver offload callbacks by extending enum nf_flow_flags with new "NF_FLOW_HW_ESTABLISHED" flag. Set the flag if ctinfo was 'established' during last act_ct meta actions fill call. This infrastructure change is necessary to optimize promoting of UDP connections from 'new' to 'established' in following patches in this series. Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 88ab98ab41d9..ebb28ec5b6fa 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -57,7 +57,7 @@ struct nf_flowtable_type { struct net_device *dev, enum flow_block_command cmd); int (*action)(struct net *net, - const struct flow_offload *flow, + struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); @@ -165,6 +165,7 @@ enum nf_flow_flags { NF_FLOW_HW_DEAD, NF_FLOW_HW_PENDING, NF_FLOW_HW_BIDIRECTIONAL, + NF_FLOW_HW_ESTABLISHED, }; enum flow_offload_type { @@ -313,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd); -int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, +int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); -int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, +int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); -- cgit v1.2.3 From 6579f5bacc2c4cbc5ef6abb45352416939d1f844 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2023 09:41:00 +0000 Subject: raw: use net_hash_mix() in hash function Some applications seem to rely on RAW sockets. If they use private netns, we can avoid piling all RAW sockets bound to a given protocol into a single bucket. Also place (struct raw_hashinfo).lock into its own cache line to limit false sharing. Alternative would be to have per-netns hashtables, but this seems too expensive for most netns where RAW sockets are not used. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/raw.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/raw.h b/include/net/raw.h index 5e665934ebc7..2c004c20ed99 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -15,6 +15,8 @@ #include #include +#include +#include #include extern struct proto raw_prot; @@ -29,13 +31,20 @@ int raw_local_deliver(struct sk_buff *, int); int raw_rcv(struct sock *, struct sk_buff *); -#define RAW_HTABLE_SIZE MAX_INET_PROTOS +#define RAW_HTABLE_LOG 8 +#define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG) struct raw_hashinfo { spinlock_t lock; - struct hlist_nulls_head ht[RAW_HTABLE_SIZE]; + + struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; +static inline u32 raw_hashfunc(const struct net *net, u32 proto) +{ + return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG); +} + static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) { int i; -- cgit v1.2.3 From 542bcea4be866b14b3a5c8e90773329066656c43 Mon Sep 17 00:00:00 2001 From: Qingfang DENG Date: Fri, 3 Feb 2023 09:16:11 +0800 Subject: net: page_pool: use in_softirq() instead We use BH context only for synchronization, so we don't care if it's actually serving softirq or not. As a side node, in case of threaded NAPI, in_serving_softirq() will return false because it's in process context with BH off, making page_pool_recycle_in_cache() unreachable. Signed-off-by: Qingfang DENG Tested-by: Felix Fietkau Signed-off-by: David S. Miller --- include/net/page_pool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 813c93499f20..34bf531ffc8d 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -386,7 +386,7 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) static inline void page_pool_ring_lock(struct page_pool *pool) __acquires(&pool->ring.producer_lock) { - if (in_serving_softirq()) + if (in_softirq()) spin_lock(&pool->ring.producer_lock); else spin_lock_bh(&pool->ring.producer_lock); @@ -395,7 +395,7 @@ static inline void page_pool_ring_lock(struct page_pool *pool) static inline void page_pool_ring_unlock(struct page_pool *pool) __releases(&pool->ring.producer_lock) { - if (in_serving_softirq()) + if (in_softirq()) spin_unlock(&pool->ring.producer_lock); else spin_unlock_bh(&pool->ring.producer_lock); -- cgit v1.2.3 From 9adafe2b85460be5b9bed9b6f6597526e7d4f7c5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Feb 2023 15:52:57 +0200 Subject: net/sched: move struct tc_mqprio_qopt_offload from pkt_cls.h to pkt_sched.h Since mqprio is a scheduler and not a classifier, move its offload structure to pkt_sched.h, where struct tc_taprio_qopt_offload also lies. Also update some header inclusions in drivers that access this structure, to the best of my abilities. Cc: Igor Russkikh Cc: Yisen Zhuang Cc: Salil Mehta Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: Thomas Petazzoni Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: Horatiu Vultur Cc: Lars Povlsen Cc: Steen Hegelund Cc: Daniel Machon Cc: UNGLinuxDriver@microchip.com Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 ---------- include/net/pkt_sched.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4cabb32a2ad9..cd410a87517b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -788,16 +788,6 @@ struct tc_cls_bpf_offload { bool exts_integrated; }; -struct tc_mqprio_qopt_offload { - /* struct tc_mqprio_qopt must always be the first element */ - struct tc_mqprio_qopt qopt; - u16 mode; - u16 shaper; - u32 flags; - u64 min_rate[TC_QOPT_MAX_QUEUE]; - u64 max_rate[TC_QOPT_MAX_QUEUE]; -}; - /* This structure holds cookie structure that is passed from user * to the kernel for actions and classifiers */ diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 38207873eda6..6c5e64e0a0bb 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -160,6 +160,16 @@ struct tc_etf_qopt_offload { s32 queue; }; +struct tc_mqprio_qopt_offload { + /* struct tc_mqprio_qopt must always be the first element */ + struct tc_mqprio_qopt qopt; + u16 mode; + u16 shaper; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; +}; + struct tc_taprio_caps { bool supports_queue_max_sdu:1; }; -- cgit v1.2.3 From 19278d76915d6b28269e1af1d7b6754c16576572 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Feb 2023 15:52:59 +0200 Subject: net/sched: mqprio: allow offloading drivers to request queue count validation mqprio_parse_opt() proudly has a comment: /* If hardware offload is requested we will leave it to the device * to either populate the queue counts itself or to validate the * provided queue counts. */ Unfortunately some device drivers did not get this memo, and don't validate the queue counts, or populate them. In case drivers don't want to populate the queue counts themselves, just act upon the requested configuration, it makes sense to introduce a tc capability, and make mqprio query it, so they don't have to do the validation themselves. Signed-off-by: Vladimir Oltean Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 6c5e64e0a0bb..02e3ccfbc7d1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -160,6 +160,10 @@ struct tc_etf_qopt_offload { s32 queue; }; +struct tc_mqprio_caps { + bool validate_queue_counts:1; +}; + struct tc_mqprio_qopt_offload { /* struct tc_mqprio_qopt must always be the first element */ struct tc_mqprio_qopt qopt; -- cgit v1.2.3 From 09c794c0a88d959a603ec49b23df8e6bba68e7b7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Feb 2023 15:53:03 +0200 Subject: net/sched: taprio: pass mqprio queue configuration to ndo_setup_tc() The taprio qdisc does not currently pass the mqprio queue configuration down to the offloading device driver. So the driver cannot act upon the TXQ counts/offsets per TC, or upon the prio->tc map. It was probably assumed that the driver only wants to offload num_tc (see TC_MQPRIO_HW_OFFLOAD_TCS), which it can get from netdev_get_num_tc(), but there's clearly more to the mqprio configuration than that. I've considered 2 mechanisms to remedy that. First is to pass a struct tc_mqprio_qopt_offload as part of the tc_taprio_qopt_offload. The second is to make taprio actually call TC_SETUP_QDISC_MQPRIO, *in addition to* TC_SETUP_QDISC_TAPRIO. The difference is that in the first case, existing drivers (offloading or not) all ignore taprio's mqprio portion currently, whereas in the second case, we could control whether to call TC_SETUP_QDISC_MQPRIO, based on a new capability. The question is which approach would be better. I'm afraid that calling TC_SETUP_QDISC_MQPRIO unconditionally (not based on a taprio capability bit) would risk introducing regressions. For example, taprio doesn't populate (or validate) qopt->hw, as well as mqprio.flags, mqprio.shaper, mqprio.min_rate, mqprio.max_rate. In comparison, adding a capability is functionally equivalent to just passing the mqprio in a way that drivers can ignore it, except it's slightly more complicated to use it (need to set the capability). Ultimately, what made me go for the "mqprio in taprio" variant was that it's easier for offloading drivers to interpret the mqprio qopt slightly differently when it comes from taprio vs when it comes from mqprio, should that ever become necessary. Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 02e3ccfbc7d1..ace8be520fb0 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -187,6 +187,7 @@ struct tc_taprio_sched_entry { }; struct tc_taprio_qopt_offload { + struct tc_mqprio_qopt_offload mqprio; u8 enable; ktime_t base_time; u64 cycle_time; -- cgit v1.2.3 From 522d15ea831f88717084304f105b1d195104880e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Feb 2023 15:53:04 +0200 Subject: net/sched: taprio: only pass gate mask per TXQ for igc, stmmac, tsnep, am65_cpsw There are 2 classes of in-tree drivers currently: - those who act upon struct tc_taprio_sched_entry :: gate_mask as if it holds a bit mask of TXQs - those who act upon the gate_mask as if it holds a bit mask of TCs When it comes to the standard, IEEE 802.1Q-2018 does say this in the second paragraph of section 8.6.8.4 Enhancements for scheduled traffic: | A gate control list associated with each Port contains an ordered list | of gate operations. Each gate operation changes the transmission gate | state for the gate associated with each of the Port's traffic class | queues and allows associated control operations to be scheduled. In typically obtuse language, it refers to a "traffic class queue" rather than a "traffic class" or a "queue". But careful reading of 802.1Q clarifies that "traffic class" and "queue" are in fact synonymous (see 8.6.6 Queuing frames): | A queue in this context is not necessarily a single FIFO data structure. | A queue is a record of all frames of a given traffic class awaiting | transmission on a given Bridge Port. The structure of this record is not | specified. i.o.w. their definition of "queue" isn't the Linux TX queue. The gate_mask really is input into taprio via its UAPI as a mask of traffic classes, but taprio_sched_to_offload() converts it into a TXQ mask. The breakdown of drivers which handle TC_SETUP_QDISC_TAPRIO is: - hellcreek, felix, sja1105: these are DSA switches, it's not even very clear what TXQs correspond to, other than purely software constructs. Only the mqprio configuration with 8 TCs and 1 TXQ per TC makes sense. So it's fine to convert these to a gate mask per TC. - enetc: I have the hardware and can confirm that the gate mask is per TC, and affects all TXQs (BD rings) configured for that priority. - igc: in igc_save_qbv_schedule(), the gate_mask is clearly interpreted to be per-TXQ. - tsnep: Gerhard Engleder clarifies that even though this hardware supports at most 1 TXQ per TC, the TXQ indices may be different from the TC values themselves, and it is the TXQ indices that matter to this hardware. So keep it per-TXQ as well. - stmmac: I have a GMAC datasheet, and in the EST section it does specify that the gate events are per TXQ rather than per TC. - lan966x: again, this is a switch, and while not a DSA one, the way in which it implements lan966x_mqprio_add() - by only allowing num_tc == NUM_PRIO_QUEUES (8) - makes it clear to me that TXQs are a purely software construct here as well. They seem to map 1:1 with TCs. - am65_cpsw: from looking at am65_cpsw_est_set_sched_cmds(), I get the impression that the fetch_allow variable is treated like a prio_mask. This definitely sounds closer to a per-TC gate mask rather than a per-TXQ one, and TI documentation does seem to recomment an identity mapping between TCs and TXQs. However, Roger Quadros would like to do some testing before making changes, so I'm leaving this driver to operate as it did before, for now. Link with more details at the end. Based on this breakdown, we have 5 drivers with a gate mask per TC and 4 with a gate mask per TXQ. So let's make the gate mask per TXQ the opt-in and the gate mask per TC the default. Benefit from the TC_QUERY_CAPS feature that Jakub suggested we add, and query the device driver before calling the proper ndo_setup_tc(), and figure out if it expects one or the other format. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20230202003621.2679603-15-vladimir.oltean@nxp.com/#25193204 Cc: Horatiu Vultur Cc: Siddharth Vadapalli Cc: Roger Quadros Signed-off-by: Vladimir Oltean Acked-by: Kurt Kanzenbach # hellcreek Reviewed-by: Gerhard Engleder Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index ace8be520fb0..fd889fc4912b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -176,6 +176,7 @@ struct tc_mqprio_qopt_offload { struct tc_taprio_caps { bool supports_queue_max_sdu:1; + bool gate_mask_per_txq:1; }; struct tc_taprio_sched_entry { -- cgit v1.2.3 From 584f3742890e966d2f0a1f3c418c9ead70b2d99e Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 4 Feb 2023 17:39:20 +0000 Subject: net: add sock_init_data_uid() Add sock_init_data_uid() to explicitly initialize the socket uid. To initialise the socket uid, sock_init_data() assumes a the struct socket* sock is always embedded in a struct socket_alloc, used to access the corresponding inode uid. This may not be true. Examples are sockets created in tun_chr_open() and tap_open(). Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Pietro Borrello Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index dcd72e6285b2..937e842dc930 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1956,7 +1956,12 @@ void sk_common_release(struct sock *sk); * Default socket callbacks and setup code */ -/* Initialise core socket variables */ +/* Initialise core socket variables using an explicit uid. */ +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid); + +/* Initialise core socket variables. + * Assumes struct socket *sock is embedded in a struct socket_alloc. + */ void sock_init_data(struct socket *sock, struct sock *sk); /* -- cgit v1.2.3 From 2f530df76c8cb5551d7d9395c77eb02282c3dc68 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Feb 2023 15:54:30 +0200 Subject: net/sched: taprio: give higher priority to higher TCs in software dequeue mode Current taprio software implementation is haunted by the shadow of the igb/igc hardware model. It iterates over child qdiscs in increasing order of TXQ index, therefore giving higher xmit priority to TXQ 0 and lower to TXQ N. According to discussions with Vinicius, that is the default (perhaps even unchangeable) prioritization scheme used for the NICs that taprio was first written for (igb, igc), and we have a case of two bugs canceling out, resulting in a functional setup on igb/igc, but a less sane one on other NICs. To the best of my understanding, taprio should prioritize based on the traffic class, so it should really dequeue starting with the highest traffic class and going down from there. We get to the TXQ using the tc_to_txq[] netdev property. TXQs within the same TC have the same (strict) priority, so we should pick from them as fairly as we can. We can achieve that by implementing something very similar to q->curband from multiq_dequeue(). Since igb/igc really do have TXQ 0 of higher hardware priority than TXQ 1 etc, we need to preserve the behavior for them as well. We really have no choice, because in txtime-assist mode, taprio is essentially a software scheduler towards offloaded child tc-etf qdiscs, so the TXQ selection really does matter (not all igb TXQs support ETF/SO_TXTIME, says Kurt Kanzenbach). To preserve the behavior, we need a capability bit so that taprio can determine if it's running on igb/igc, or on something else. Because igb doesn't offload taprio at all, we can't piggyback on the qdisc_offload_query_caps() call from taprio_enable_offload(), but instead we need a separate call which is also made for software scheduling. Introduce two static keys to minimize the performance penalty on systems which only have igb/igc NICs, and on systems which only have other NICs. For mixed systems, taprio will have to dynamically check whether to dequeue using one prioritization algorithm or using the other. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index fd889fc4912b..2016839991a4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -177,6 +177,11 @@ struct tc_mqprio_qopt_offload { struct tc_taprio_caps { bool supports_queue_max_sdu:1; bool gate_mask_per_txq:1; + /* Device expects lower TXQ numbers to have higher priority over higher + * TXQs, regardless of their TC mapping. DO NOT USE FOR NEW DRIVERS, + * INSTEAD ENFORCE A PROPER TC:TXQ MAPPING COMING FROM USER SPACE. + */ + bool broken_mqprio:1; }; struct tc_taprio_sched_entry { -- cgit v1.2.3 From 969cf3e670b5532dc345b8fafaf96a94278a7e09 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Jan 2023 19:37:52 -0600 Subject: Bluetooth: HCI: Replace zero-length arrays with flexible-array members Zero-length arrays are deprecated[1] and we are moving towards adopting C99 flexible-array members instead. So, replace zero-length arrays in a couple of structures with flex-array members. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [2]. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [2] Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8d773b042c85..400f8a7d0c3f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2156,7 +2156,7 @@ struct hci_cp_le_big_create_sync { __u8 mse; __le16 timeout; __u8 num_bis; - __u8 bis[0]; + __u8 bis[]; } __packed; #define HCI_OP_LE_BIG_TERM_SYNC 0x206c @@ -2174,7 +2174,7 @@ struct hci_cp_le_setup_iso_path { __le16 codec_vid; __u8 delay[3]; __u8 codec_cfg_len; - __u8 codec_cfg[0]; + __u8 codec_cfg[]; } __packed; struct hci_rp_le_setup_iso_path { -- cgit v1.2.3 From 2394186a2cefb9a45a029281a55749804dd8c556 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 30 Jan 2023 20:37:01 +0200 Subject: Bluetooth: MGMT: add CIS feature bits to controller information Userspace needs to know whether the adapter has feature support for Connected Isochronous Stream - Central/Peripheral, so it can set up LE Audio features accordingly. Expose these feature bits as settings in MGMT controller info. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 743f6f59dff8..e18a927669c0 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -109,6 +109,8 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_STATIC_ADDRESS 0x00008000 #define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 #define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000 +#define MGMT_SETTING_CIS_CENTRAL 0x00040000 +#define MGMT_SETTING_CIS_PERIPHERAL 0x00080000 #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 -- cgit v1.2.3 From 67fc5d7ffbd4f9cf52adf166f5bc9a35fef37f24 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Feb 2023 17:52:07 -0500 Subject: net: extract nf_ct_skb_network_trim function to nf_conntrack_ovs There are almost the same code in ovs_skb_network_trim() and tcf_ct_skb_network_trim(), this patch extracts them into a function nf_ct_skb_network_trim() and moves the function to nf_conntrack_ovs. Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Aaron Conole Acked-by: Florian Westphal Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa464..a6e89d7212f8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -362,6 +362,8 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net) return net_generic(net, nf_conntrack_net_id); } +int nf_ct_skb_network_trim(struct sk_buff *skb, int family); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) -- cgit v1.2.3 From 0785407e78d4bce56e04d92a6c961900b3d513dd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Feb 2023 17:52:10 -0500 Subject: net: extract nf_ct_handle_fragments to nf_conntrack_ovs Now handle_fragments() in OVS and TC have the similar code, and this patch removes the duplicate code by moving the function to nf_conntrack_ovs. Note that skb_clear_hash(skb) or skb->ignore_df = 1 should be done only when defrag returns 0, as it does in other places in kernel. Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Aaron Conole Acked-by: Florian Westphal Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a6e89d7212f8..7bbab8f2b73d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -363,6 +363,8 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net) } int nf_ct_skb_network_trim(struct sk_buff *skb, int family); +int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, + u16 zone, u8 family, u8 *proto, u16 *mru); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) -- cgit v1.2.3 From ca43ccf41224b023fc290073d5603a755fd12eed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 9 Feb 2023 16:22:01 -0800 Subject: dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions. Eric Dumazet pointed out [0] that when we call skb_set_owner_r() for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called, resulting in a negative sk_forward_alloc. We add a new helper which clones a skb and sets its owner only when sk_rmem_schedule() succeeds. Note that we move skb_set_owner_r() forward in (dccp|tcp)_v6_do_rcv() because tcp_send_synack() can make sk_forward_alloc negative before ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases. [0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/ Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()") Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/sock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index dcd72e6285b2..556209727633 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2434,6 +2434,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) +{ + skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + if (skb) { + if (sk_rmem_schedule(sk, skb, skb->truesize)) { + skb_set_owner_r(skb, sk); + return skb; + } + __kfree_skb(skb); + } + return NULL; +} + static inline void skb_prepare_for_gro(struct sk_buff *skb) { if (skb->destructor != sock_wfree) { -- cgit v1.2.3 From afd888c3e19ceb5247158fe2fabbf7234937a515 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Feb 2023 11:01:26 +0100 Subject: devlink: make sure driver does not read updated driverinit param before reload The driverinit param purpose is to serve the driver during init/reload time to provide a value, either default or set by user. Make sure that driver does not read value updated by user before the reload is performed. Hold the new value in a separate struct and switch it during reload. Note that this is required to be eventually possible to call devl_param_driverinit_value_get() without holding instance lock. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 2e85a5970a32..8ed960345f37 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -489,6 +489,10 @@ struct devlink_param_item { const struct devlink_param *param; union devlink_param_value driverinit_value; bool driverinit_value_valid; + union devlink_param_value driverinit_value_new; /* Not reachable + * until reload. + */ + bool driverinit_value_new_valid; }; enum devlink_param_generic_id { -- cgit v1.2.3 From 94ba1c316b9c0f9b017f7cd7eac84adae693e80f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Feb 2023 11:01:27 +0100 Subject: devlink: fix the name of value arg of devl_param_driverinit_value_get() Probably due to copy-paste error, the name of the arg is "init_val" which is misleading, as the pointer is used to point to struct where to store the current value. Rename it to "val" and change the arg comment a bit on the way. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Acked-by: Jakub Kicinski Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/devlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8ed960345f37..6a942e70e451 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1784,7 +1784,7 @@ void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val); + union devlink_param_value *val); void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devl_param_value_changed(struct devlink *devlink, u32 param_id); -- cgit v1.2.3 From dc68eaf2c29f410fb078fd6da8e56201d3282e0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2023 18:47:05 +0000 Subject: net: dropreason: add SKB_DROP_REASON_IPV6_BAD_EXTHDR This drop reason can be used whenever an IPv6 packet has a malformed extension header. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/dropreason.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 94bc3d5d8803..6c41e535175c 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -72,6 +72,7 @@ FN(FRAG_REASM_TIMEOUT) \ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ + FN(IPV6_BAD_EXTHDR) \ FNe(MAX) /** @@ -318,6 +319,8 @@ enum skb_drop_reason { * the threshold (IP_MINTTL or IPV6_MINHOPCOUNT). */ SKB_DROP_REASON_TCP_MINTTL, + /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ + SKB_DROP_REASON_IPV6_BAD_EXTHDR, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' -- cgit v1.2.3 From 30c89bad3ea2ef7a2d4686f9c3cc08420fe627bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2023 18:47:07 +0000 Subject: ipv6: icmp6: add drop reason support to icmpv6_notify() Accurately reports what happened in icmpv6_notify() when handling a packet. This makes use of the new IPV6_BAD_EXTHDR drop reason. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 03f3af02a9a6..7332296eca44 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -436,7 +436,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } -void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); +enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, + u8 code, __be32 info); void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len); -- cgit v1.2.3 From 545dbcd124b02c9dc93c8a5894c71d682effc3e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Feb 2023 18:47:08 +0000 Subject: ipv6: icmp6: add drop reason support to ndisc_rcv() Creates three new drop reasons: SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/dropreason.h | 9 +++++++++ include/net/ndisc.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 6c41e535175c..ef3f65d135d3 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -73,6 +73,9 @@ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ FN(IPV6_BAD_EXTHDR) \ + FN(IPV6_NDISC_FRAG) \ + FN(IPV6_NDISC_HOP_LIMIT) \ + FN(IPV6_NDISC_BAD_CODE) \ FNe(MAX) /** @@ -321,6 +324,12 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_MINTTL, /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ SKB_DROP_REASON_IPV6_BAD_EXTHDR, + /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */ + SKB_DROP_REASON_IPV6_NDISC_FRAG, + /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */ + SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT, + /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */ + SKB_DROP_REASON_IPV6_NDISC_BAD_CODE, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' diff --git a/include/net/ndisc.h b/include/net/ndisc.h index da7eec8669ec..07e5168cdaf9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -445,7 +445,7 @@ int ndisc_late_init(void); void ndisc_late_cleanup(void); void ndisc_cleanup(void); -int ndisc_rcv(struct sk_buff *skb); +enum skb_drop_reason ndisc_rcv(struct sk_buff *skb); struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *saddr, u64 nonce); -- cgit v1.2.3 From ac7d27907d5445d0accaf998e1dc3ea570ed1ba6 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 12 Feb 2023 15:25:14 +0200 Subject: net/sched: pass flow_stats instead of multiple stats args Instead of passing 6 stats related args, pass the flow_stats. Signed-off-by: Oz Shlomo Reviewed-by: Simon Horman Reviewed-by: Marcelo Ricardo Leitner Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/net/pkt_cls.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cd410a87517b..bf50829d9255 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -294,8 +294,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) static inline void tcf_exts_hw_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) + struct flow_stats *stats) { #ifdef CONFIG_NET_CLS_ACT int i; @@ -306,12 +305,12 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts, /* if stats from hw, just skip */ if (tcf_action_update_hw_stats(a)) { preempt_disable(); - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); + tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops, + stats->lastused, true); preempt_enable(); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; + a->used_hw_stats = stats->used_hw_stats; + a->used_hw_stats_valid = stats->used_hw_stats_valid; } } #endif -- cgit v1.2.3 From d307b2c6f962ad5d83d7a7df71c2e9c9e4106d82 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 12 Feb 2023 15:25:15 +0200 Subject: net/sched: introduce flow_offload action cookie Currently a hardware action is uniquely identified by the tuple. However, the id is set by the flow_act_setup callback and tc core cannot enforce this, and it is possible that a future change could break this. In addition, are not unique across network namespaces. Uniquely identify the action by setting an action cookie by the tc core. Use the unique action cookie to query the action's hardware stats. Signed-off-by: Oz Shlomo Reviewed-by: Simon Horman Reviewed-by: Marcelo Ricardo Leitner Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/net/flow_offload.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 0400a0ac8a29..d177bf5f0e1a 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -228,6 +228,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; u32 hw_index; + unsigned long act_cookie; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -610,6 +611,7 @@ struct flow_offload_action { enum offload_act_command command; enum flow_action_id id; u32 index; + unsigned long cookie; struct flow_stats stats; struct flow_action action; }; -- cgit v1.2.3 From 5246c896b805b043a87fa78af32a33cbce00de05 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Sun, 12 Feb 2023 15:25:16 +0200 Subject: net/sched: support per action hw stats There are currently two mechanisms for populating hardware stats: 1. Using flow_offload api to query the flow's statistics. The api assumes that the same stats values apply to all the flow's actions. This assumption breaks when action drops or jumps over following actions. 2. Using hw_action api to query specific action stats via a driver callback method. This api assures the correct action stats for the offloaded action, however, it does not apply to the rest of the actions in the flow's actions array. Extend the flow_offload stats callback to indicate that a per action stats update is required. Use the existing flow_offload_action api to query the action's hw stats. In addition, currently the tc action stats utility only updates hw actions. Reuse the existing action stats cb infrastructure to query any action stats. Signed-off-by: Oz Shlomo Reviewed-by: Simon Horman Reviewed-by: Marcelo Ricardo Leitner Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/net/flow_offload.h | 1 + include/net/pkt_cls.h | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index d177bf5f0e1a..8c05455b1e34 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -594,6 +594,7 @@ struct flow_cls_common_offload { struct flow_cls_offload { struct flow_cls_common_offload common; enum flow_cls_command command; + bool use_act_stats; unsigned long cookie; struct flow_rule *rule; struct flow_stats stats; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bf50829d9255..ace437c6754b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -292,9 +292,15 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) #define tcf_act_for_each_action(i, a, actions) \ for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) +static inline bool tc_act_in_hw(struct tc_action *act) +{ + return !!act->in_hw_count; +} + static inline void tcf_exts_hw_stats_update(const struct tcf_exts *exts, - struct flow_stats *stats) + struct flow_stats *stats, + bool use_act_stats) { #ifdef CONFIG_NET_CLS_ACT int i; @@ -302,16 +308,18 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - /* if stats from hw, just skip */ - if (tcf_action_update_hw_stats(a)) { - preempt_disable(); - tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops, - stats->lastused, true); - preempt_enable(); - - a->used_hw_stats = stats->used_hw_stats; - a->used_hw_stats_valid = stats->used_hw_stats_valid; + if (use_act_stats || tc_act_in_hw(a)) { + if (!tcf_action_update_hw_stats(a)) + continue; } + + preempt_disable(); + tcf_action_stats_update(a, stats->bytes, stats->pkts, stats->drops, + stats->lastused, true); + preempt_enable(); + + a->used_hw_stats = stats->used_hw_stats; + a->used_hw_stats_valid = stats->used_hw_stats_valid; } #endif } @@ -769,6 +777,7 @@ struct tc_cls_matchall_offload { enum tc_matchall_command command; struct flow_rule *rule; struct flow_stats stats; + bool use_act_stats; unsigned long cookie; }; -- cgit v1.2.3 From 9a47c1ef5a95d1fd229ee5e375985f809a9d8177 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 16 Jan 2023 18:20:58 +0530 Subject: wifi: cfg80211: Authentication offload to user space for MLO connection in STA mode Currently authentication request event interface doesn't have support to indicate the user space whether it should enable MLO or not during the authentication with the specified AP. But driver needs such capability since the connection is MLO or not decided by the driver in case of SME offload to the driver. Add support for driver to indicate MLD address of the AP in authentication offload request to inform user space to enable MLO during authentication process. Driver shall look at NL80211_ATTR_MLO_SUPPORT flag capability in NL80211_CMD_CONNECT to know whether the user space supports enabling MLO during the authentication offload. User space should enable MLO during the authentication only when it receives the AP MLD address in authentication offload request. User space shouldn't enable MLO if the authentication offload request doesn't indicate the AP MLD address even if the AP is MLO capable. When MLO is enabled, user space should use the MAC address of the interface (on which driver sent request) as self MLD address. User space and driver to use MLD addresses in RA, TA and BSSID fields of the frames between them, and driver translates the MLD addresses to/from link addresses based on the link chosen for the authentication. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230116125058.1604843-1-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 54a77d906b2d..bcde215475c6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3600,6 +3600,17 @@ struct cfg80211_pmk_conf { * the real status code for failures. Used only for the authentication * response command interface (user space to driver). * @pmkid: The identifier to refer a PMKSA. + * @mld_addr: MLD address of the peer. Used by the authentication request event + * interface. Driver indicates this to enable MLO during the authentication + * offload to user space. Driver shall look at %NL80211_ATTR_MLO_SUPPORT + * flag capability in NL80211_CMD_CONNECT to know whether the user space + * supports enabling MLO during the authentication offload. + * User space should use the address of the interface (on which the + * authentication request event reported) as self MLD address. User space + * and driver should use MLD addresses in RA, TA and BSSID fields of + * authentication frames sent or received via cfg80211. The driver + * translates the MLD addresses to/from link addresses based on the link + * chosen for the authentication. */ struct cfg80211_external_auth_params { enum nl80211_external_auth_action action; @@ -3608,6 +3619,7 @@ struct cfg80211_external_auth_params { unsigned int key_mgmt_suite; u16 status; const u8 *pmkid; + u8 mld_addr[ETH_ALEN] __aligned(2); }; /** -- cgit v1.2.3 From a42e59eb9689e54279227e2af5ed75128d92a82b Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 26 Jan 2023 20:02:55 +0530 Subject: wifi: cfg80211: Extend cfg80211_new_sta() for MLD AP Add support for drivers to indicate STA connection(MLO/non-MLO) when user space SME (e.g., hostapd) is not used for MLD AP. Add new parameters in struct station_info to provide below information in cfg80211_new_sta() call: - MLO link ID of the AP, with which station completed (re)association. This is applicable for both MLO and non-MLO station connections when the AP affiliated with an MLD. - Station's MLD address if the connection is MLO capable. - (Re)Association Response IEs sent to the station. User space needs this to determine rejected and accepted affiliated links information of the connected station if the connection is MLO capable. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230126143256.960563-2-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bcde215475c6..28529194eb89 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1876,6 +1876,24 @@ struct cfg80211_tid_stats { * received packet with an FCS error matches the peer MAC address. * @airtime_link_metric: mesh airtime link metric. * @connected_to_as: true if mesh STA has a path to authentication server + * @mlo_params_valid: Indicates @assoc_link_id and @mld_addr fields are filled + * by driver. Drivers use this only in cfg80211_new_sta() calls when AP + * MLD's MLME/SME is offload to driver. Drivers won't fill this + * information in cfg80211_del_sta_sinfo(), get_station() and + * dump_station() callbacks. + * @assoc_link_id: Indicates MLO link ID of the AP, with which the station + * completed (re)association. This information filled for both MLO + * and non-MLO STA connections when the AP affiliated with an MLD. + * @mld_addr: For MLO STA connection, filled with MLD address of the station. + * For non-MLO STA connection, filled with all zeros. + * @assoc_resp_ies: IEs from (Re)Association Response. + * This is used only when in AP mode with drivers that do not use user + * space MLME/SME implementation. The information is provided only for the + * cfg80211_new_sta() calls to notify user space of the IEs. Drivers won't + * fill this information in cfg80211_del_sta_sinfo(), get_station() and + * dump_station() callbacks. User space needs this information to determine + * the accepted and rejected affiliated links of the connected station. + * @assoc_resp_ies_len: Length of @assoc_resp_ies buffer in octets. */ struct station_info { u64 filled; @@ -1935,6 +1953,12 @@ struct station_info { u32 airtime_link_metric; u8 connected_to_as; + + bool mlo_params_valid; + u8 assoc_link_id; + u8 mld_addr[ETH_ALEN] __aligned(2); + const u8 *assoc_resp_ies; + size_t assoc_resp_ies_len; }; /** -- cgit v1.2.3 From 8bb588d975019748ebdab9448e9a274b7463c13b Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 26 Jan 2023 20:02:56 +0530 Subject: wifi: cfg80211: Extend cfg80211_update_owe_info_event() for MLD AP Add support to offload OWE processing to user space for MLD AP when driver's SME in use. Add new parameters in struct cfg80211_update_owe_info to provide below information in cfg80211_update_owe_info_event() call: - MLO link ID of the AP, with which station requested (re)association. This is applicable for both MLO and non-MLO station connections when the AP affiliated with an MLD. - Station's MLD address if the connection is MLO capable. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230126143256.960563-3-quic_vjakkam@quicinc.com [reformat the trace event macro] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 28529194eb89..70f01ea5ba5c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3898,12 +3898,22 @@ struct cfg80211_pmsr_request { * the IEs of the remote peer in the event from the host driver and * the constructed IEs by the user space in the request interface. * @ie_len: Length of IEs in octets. + * @assoc_link_id: MLO link ID of the AP, with which (re)association requested + * by peer. This will be filled by driver for both MLO and non-MLO station + * connections when the AP affiliated with an MLD. For non-MLD AP mode, it + * will be -1. Used only with OWE update event (driver to user space). + * @peer_mld_addr: For MLO connection, MLD address of the peer. For non-MLO + * connection, it will be all zeros. This is applicable only when + * @assoc_link_id is not -1, i.e., the AP affiliated with an MLD. Used only + * with OWE update event (driver to user space). */ struct cfg80211_update_owe_info { u8 peer[ETH_ALEN] __aligned(2); u16 status; const u8 *ie; size_t ie_len; + int assoc_link_id; + u8 peer_mld_addr[ETH_ALEN] __aligned(2); }; /** -- cgit v1.2.3 From aa87cd8b35736a5183745ab0ec4b82419024dfd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Jan 2023 12:39:31 +0100 Subject: wifi: mac80211: mlme: handle EHT channel puncturing Handle the Puncturing info received from the AP in the EHT Operation element in beacons. If the info is invalid: - during association: disable EHT connection for the AP - after association: disconnect This commit includes many (internal) bugfixes and spec updates various people. Co-developed-by: Miri Korenblit Signed-off-by: Miri Korenblit Link: https://lore.kernel.org/r/20230127123930.4fbc74582331.I3547481d49f958389f59dfeba3fcc75e72b0aa6e@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2635e6de8101..54ffc0cc2918 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -340,7 +340,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed. * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. - * + * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -375,6 +375,7 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, + BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -640,6 +641,7 @@ struct ieee80211_fils_discovery { * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT + * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS * @csa_active: marks whether a channel switch is going on. Internally it is * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. @@ -736,6 +738,7 @@ struct ieee80211_bss_conf { u8 tx_pwr_env_num; u8 pwr_reduction; bool eht_support; + u16 eht_puncturing; bool csa_active; bool mu_mimo_owner; -- cgit v1.2.3 From b25413fed3d43e1ed3340df4d928971bb8639f66 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Mon, 30 Jan 2023 16:12:24 -0800 Subject: wifi: cfg80211: move puncturing bitmap validation from mac80211 - Move ieee80211_valid_disable_subchannel_bitmap() from mlme.c to chan.c, rename it as cfg80211_valid_disable_subchannel_bitmap() and export it. - Modify the prototype to include struct cfg80211_chan_def instead of only bandwidth to support a check which returns false if the primary channel is punctured. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20230131001227.25014-2-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 70f01ea5ba5c..191603f32b37 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8952,4 +8952,16 @@ static inline int cfg80211_color_change_notify(struct net_device *dev) 0, 0); } +/** + * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap + * @bitmap: bitmap to be validated + * @chandef: channel definition + * + * Validate the puncturing bitmap. + * + * Return: %true if the bitmap is valid. %false otherwise. + */ +bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, + const struct cfg80211_chan_def *chandef); + #endif /* __NET_CFG80211_H */ -- cgit v1.2.3 From d7c1a9a0ed180d8884798ce97afe7283622a484f Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Mon, 30 Jan 2023 16:12:25 -0800 Subject: wifi: nl80211: validate and configure puncturing bitmap - New feature flag, NL80211_EXT_FEATURE_PUNCT, to advertise driver support for preamble puncturing in AP mode. - New attribute, NL80211_ATTR_PUNCT_BITMAP, to receive a puncturing bitmap from the userspace during AP bring up (NL80211_CMD_START_AP) and channel switch (NL80211_CMD_CHANNEL_SWITCH) operations. Each bit corresponds to a 20 MHz channel in the operating bandwidth, lowest bit for the lowest channel. Bit set to 1 indicates that the channel is punctured. Higher 16 bits are reserved. - New members added to structures cfg80211_ap_settings and cfg80211_csa_settings to propagate the bitmap to the driver after validation. Signed-off-by: Aloka Dixit Signed-off-by: Muna Sinada Link: https://lore.kernel.org/r/20230131001227.25014-3-quic_alokad@quicinc.com [move validation against 0xffff into policy] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 191603f32b37..fb8b79e6ac36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1316,6 +1316,9 @@ struct cfg80211_unsol_bcast_probe_resp { * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @mbssid_config: AP settings for multiple bssid + * @punct_bitmap: Preamble puncturing bitmap. Each bit represents + * a 20 MHz channel, lowest bit corresponding to the lowest channel. + * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1350,6 +1353,7 @@ struct cfg80211_ap_settings { struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; + u16 punct_bitmap; }; /** @@ -1367,6 +1371,9 @@ struct cfg80211_ap_settings { * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch + * @punct_bitmap: Preamble puncturing bitmap. Each bit represents + * a 20 MHz channel, lowest bit corresponding to the lowest channel. + * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; @@ -1379,6 +1386,7 @@ struct cfg80211_csa_settings { bool radar_required; bool block_tx; u8 count; + u16 punct_bitmap; }; /** -- cgit v1.2.3 From b345f0637c0042f9e6b78378a32256d90f485774 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Mon, 30 Jan 2023 16:12:26 -0800 Subject: wifi: cfg80211: include puncturing bitmap in channel switch events Add puncturing bitmap in channel switch notifications and corresponding trace functions. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20230131001227.25014-4-quic_alokad@quicinc.com [fix qtnfmac] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb8b79e6ac36..e570530191ec 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8326,13 +8326,14 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @dev: the device which switched channels * @chandef: the new channel definition * @link_id: the link ID for MLO, must be 0 for non-MLO + * @punct_bitmap: the new puncturing bitmap * * Caller must acquire wdev_lock, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - unsigned int link_id); + unsigned int link_id, u16 punct_bitmap); /* * cfg80211_ch_switch_started_notify - notify channel switch start @@ -8341,6 +8342,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, * @link_id: the link ID for MLO, must be 0 for non-MLO * @count: the number of TBTTs until the channel switch happens * @quiet: whether or not immediate quiet was requested by the AP + * @punct_bitmap: the future puncturing bitmap * * Inform the userspace about the channel switch that has just * started, so that it can take appropriate actions (eg. starting @@ -8349,7 +8351,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, - bool quiet); + bool quiet, u16 punct_bitmap); /** * ieee80211_operating_class_to_band - convert operating class to band -- cgit v1.2.3 From 2cc25e4b2a04cdd90dbb2916678745565cc4aeed Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Mon, 30 Jan 2023 16:12:27 -0800 Subject: wifi: mac80211: configure puncturing bitmap - Configure the bitmap in link_conf and notify the driver. - Modify 'change' in ieee80211_start_ap() from u32 to u64 to support BSS_CHANGED_EHT_PUNCTURING. - Propagate the bitmap in channel switch events to userspace. Signed-off-by: Aloka Dixit Signed-off-by: Muna Sinada Link: https://lore.kernel.org/r/20230131001227.25014-5-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 54ffc0cc2918..219fd15893b0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -645,6 +645,7 @@ struct ieee80211_fils_discovery { * @csa_active: marks whether a channel switch is going on. Internally it is * write-protected by sdata_lock and local->mtx so holding either is fine * for read access. + * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL * when it is not assigned. This pointer is RCU-protected due to the TX @@ -741,6 +742,8 @@ struct ieee80211_bss_conf { u16 eht_puncturing; bool csa_active; + u16 csa_punct_bitmap; + bool mu_mimo_owner; struct ieee80211_chanctx_conf __rcu *chanctx_conf; -- cgit v1.2.3 From 986e43b19ae9176093da35e0a844e65c8bf9ede7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 13 Feb 2023 11:08:54 +0100 Subject: wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces The current mac80211 mesh A-MSDU receive path fails to parse A-MSDU packets on mesh interfaces, because it assumes that the Mesh Control field is always directly after the 802.11 header. 802.11-2020 9.3.2.2.2 Figure 9-70 shows that the Mesh Control field is actually part of the A-MSDU subframe header. This makes more sense, since it allows packets for multiple different destinations to be included in the same A-MSDU, as long as RA and TID are still the same. Another issue is the fact that the A-MSDU subframe length field was apparently accidentally defined as little-endian in the standard. In order to fix this, the mesh forwarding path needs happen at a different point in the receive path. ieee80211_data_to_8023_exthdr is changed to ignore the mesh control field and leave it in after the ethernet header. This also affects the source/dest MAC address fields, which now in the case of mesh point to the mesh SA/DA. ieee80211_amsdu_to_8023s is changed to deal with the endian difference and to add the Mesh Control length to the subframe length, since it's not covered by the MSDU length field. With these changes, the mac80211 will get the same packet structure for converted regular data packets and unpacked A-MSDU subframes. The mesh forwarding checks are now only performed after the A-MSDU decap. For locally received packets, the Mesh Control header is stripped away. For forwarded packets, a new 802.11 header gets added. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230213100855.34315-4-nbd@nbd.name [fix fortify build error] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e570530191ec..c506dc128685 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6251,11 +6251,36 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, * @extra_headroom: The hardware extra headroom for SKBs in the @list. * @check_da: DA to check in the inner ethernet header, or NULL * @check_sa: SA to check in the inner ethernet header, or NULL + * @mesh_control: A-MSDU subframe header includes the mesh control field */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, - const u8 *check_da, const u8 *check_sa); + const u8 *check_da, const u8 *check_sa, + bool mesh_control); + +/** + * ieee80211_get_8023_tunnel_proto - get RFC1042 or bridge tunnel encap protocol + * + * Check for RFC1042 or bridge tunnel header and fetch the encapsulated + * protocol. + * + * @hdr: pointer to the MSDU payload + * @proto: destination pointer to store the protocol + * Return: true if encapsulation was found + */ +bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto); + +/** + * ieee80211_strip_8023_mesh_hdr - strip mesh header from converted 802.3 frames + * + * Strip the mesh header, which was left in by ieee80211_data_to_8023 as part + * of the MSDU data. Also move any source/destination addresses from the mesh + * header to the ethernet header (if present). + * + * @skb: The 802.3 frame with embedded mesh header + */ +int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame -- cgit v1.2.3 From 6e4c0d0460bd32ca9244dff3ba2d2da27235de11 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 13 Feb 2023 11:08:55 +0100 Subject: wifi: mac80211: add a workaround for receiving non-standard mesh A-MSDU At least ath10k and ath11k supported hardware (maybe more) does not implement mesh A-MSDU aggregation in a standard compliant way. 802.11-2020 9.3.2.2.2 declares that the Mesh Control field is part of the A-MSDU header (and little-endian). As such, its length must not be included in the subframe length field. Hardware affected by this bug treats the mesh control field as part of the MSDU data and sets the length accordingly. In order to avoid packet loss, keep track of which stations are affected by this and take it into account when converting A-MSDU to 802.3 + mesh control packets. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230213100855.34315-5-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c506dc128685..9b015bb877db 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6236,6 +6236,19 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false); } +/** + * ieee80211_is_valid_amsdu - check if subframe lengths of an A-MSDU are valid + * + * This is used to detect non-standard A-MSDU frames, e.g. the ones generated + * by ath10k and ath11k, where the subframe length includes the length of the + * mesh control field. + * + * @skb: The input A-MSDU frame without any headers. + * @mesh_hdr: use standard compliant mesh A-MSDU subframe header + * Returns: true if subframe header lengths are valid for the @mesh_hdr mode + */ +bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr); + /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * -- cgit v1.2.3 From 935ef47b16cc5bc15fcd2b3dbc61abb0b7ea671a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 1 Feb 2023 13:48:30 +0100 Subject: wifi: cfg80211: get rid of gfp in cfg80211_bss_color_notify Since cfg80211_bss_color_notify() is now always run in non-atomic context, get rid of gfp_t flags in the routine signature and always use GFP_KERNEL for netlink message allocation. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c687724e7b53556f7a2d9cbe3d11cdcf065cb687.1675255390.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9b015bb877db..c65f17d74191 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8936,12 +8936,11 @@ void cfg80211_bss_flush(struct wiphy *wiphy); /** * cfg80211_bss_color_notify - notify about bss color event * @dev: network device - * @gfp: allocation flags * @cmd: the actual event we want to notify * @count: the number of TBTTs until the color change happens * @color_bitmap: representations of the colors that the local BSS is aware of */ -int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, +int cfg80211_bss_color_notify(struct net_device *dev, enum nl80211_commands cmd, u8 count, u64 color_bitmap); @@ -8952,10 +8951,9 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap, gfp_t gfp) + u64 color_bitmap) { - return cfg80211_bss_color_notify(dev, gfp, - NL80211_CMD_OBSS_COLOR_COLLISION, + return cfg80211_bss_color_notify(dev, NL80211_CMD_OBSS_COLOR_COLLISION, 0, color_bitmap); } @@ -8969,8 +8967,7 @@ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, static inline int cfg80211_color_change_started_notify(struct net_device *dev, u8 count) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, - NL80211_CMD_COLOR_CHANGE_STARTED, + return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_STARTED, count, 0); } @@ -8982,8 +8979,7 @@ static inline int cfg80211_color_change_started_notify(struct net_device *dev, */ static inline int cfg80211_color_change_aborted_notify(struct net_device *dev) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, - NL80211_CMD_COLOR_CHANGE_ABORTED, + return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_ABORTED, 0, 0); } @@ -8995,7 +8991,7 @@ static inline int cfg80211_color_change_aborted_notify(struct net_device *dev) */ static inline int cfg80211_color_change_notify(struct net_device *dev) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, + return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_COMPLETED, 0, 0); } -- cgit v1.2.3 From d99975c4953eb79e389d4630e848435c700e2dfc Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Wed, 1 Feb 2023 01:53:13 -0500 Subject: wifi: cfg80211: call reg_notifier for self managed wiphy from driver hint Currently the regulatory driver does not call the regulatory callback reg_notifier for self managed wiphys. Sometimes driver needs cfg80211 to calculate the info of ieee80211_channel such as flags and power, and driver needs to get the info of ieee80211_channel after hint of driver, but driver does not know when calculation of the info of ieee80211_channel become finished, so add notify to driver in reg_process_self_managed_hint() from cfg80211 is a good way, then driver could get the correct info in callback of reg_notifier. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20230201065313.27203-1-quic_wgong@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c65f17d74191..15fb019ce28d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4737,6 +4737,8 @@ struct cfg80211_ops { * complete feature/interface combinations/etc. advertisement. No driver * should set this flag for now. * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys. + * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for + * NL80211_REGDOM_SET_BY_DRIVER. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4762,6 +4764,7 @@ enum wiphy_flags { WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), + WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24), }; /** -- cgit v1.2.3 From fe33311c3e371855c4f4c0ab8a5fce5b9a9fdafd Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 14 Feb 2023 12:14:10 +0800 Subject: net: no longer support SOCK_REFCNT_DEBUG feature Commit e48c414ee61f ("[INET]: Generalise the TCP sock ID lookup routines") commented out the definition of SOCK_REFCNT_DEBUG in 2005 and later another commit 463c84b97f24 ("[NET]: Introduce inet_connection_sock") removed it. Since we could track all of them through bpf and kprobe related tools and the feature could print loads of information which might not be that helpful even under a little bit pressure, the whole feature which has been inactive for many years is no longer supported. Link: https://lore.kernel.org/lkml/20230211065153.54116-1-kerneljasonxing@gmail.com/ Suggested-by: Kuniyuki Iwashima Signed-off-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Acked-by: Wenjia Zhang Reviewed-by: Eric Dumazet Acked-by: Matthieu Baerts Signed-off-by: David S. Miller --- include/net/sock.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 937e842dc930..2cb258fde072 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1349,9 +1349,6 @@ struct proto { char name[32]; struct list_head node; -#ifdef SOCK_REFCNT_DEBUG - atomic_t socks; -#endif int (*diag_destroy)(struct sock *sk, int err); } __randomize_layout; @@ -1359,31 +1356,6 @@ int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); int sock_load_diag_module(int family, int protocol); -#ifdef SOCK_REFCNT_DEBUG -static inline void sk_refcnt_debug_inc(struct sock *sk) -{ - atomic_inc(&sk->sk_prot->socks); -} - -static inline void sk_refcnt_debug_dec(struct sock *sk) -{ - atomic_dec(&sk->sk_prot->socks); - printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); -} - -static inline void sk_refcnt_debug_release(const struct sock *sk) -{ - if (refcount_read(&sk->sk_refcnt) != 1) - printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); -} -#else /* SOCK_REFCNT_DEBUG */ -#define sk_refcnt_debug_inc(sk) do { } while (0) -#define sk_refcnt_debug_dec(sk) do { } while (0) -#define sk_refcnt_debug_release(sk) do { } while (0) -#endif /* SOCK_REFCNT_DEBUG */ - INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); static inline int sk_forward_alloc_get(const struct sock *sk) -- cgit v1.2.3 From 4048a6a7380c8be2f06c1c386dd63d1bd3cdb0a0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 15 Feb 2023 10:32:07 +0100 Subject: wifi: cfg80211: remove gfp parameter from cfg80211_obss_color_collision_notify description Get rid of gfp parameter from cfg80211_obss_color_collision_notify routine description. Fixes: 935ef47b16cc ("wifi: cfg80211: get rid of gfp in cfg80211_bss_color_notify") Reported-by: Stephen Rothwell Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/2da652e2cd5c7903191091ae9757718f1be802a1.1676453359.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 15fb019ce28d..f115b2550309 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8951,7 +8951,6 @@ int cfg80211_bss_color_notify(struct net_device *dev, * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of - * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, u64 color_bitmap) -- cgit v1.2.3 From 8c710f75256bb3cf05ac7b1672c82b92c43f3d28 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:14 -0500 Subject: net/sched: Retire tcindex classifier The tcindex classifier has served us well for about a quarter of a century but has not been getting much TLC due to lack of known users. Most recently it has become easy prey to syzkaller. For this reason, we are retiring it. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni --- include/net/tc_wrapper.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index d323fffb839a..8ba241760d0a 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -154,7 +154,6 @@ TC_INDIRECT_FILTER_DECLARE(mall_classify); TC_INDIRECT_FILTER_DECLARE(route4_classify); TC_INDIRECT_FILTER_DECLARE(rsvp_classify); TC_INDIRECT_FILTER_DECLARE(rsvp6_classify); -TC_INDIRECT_FILTER_DECLARE(tcindex_classify); TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -207,10 +206,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tp->classify == rsvp6_classify) return rsvp6_classify(skb, tp, res); #endif -#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX) - if (tp->classify == tcindex_classify) - return tcindex_classify(skb, tp, res); -#endif skip: return tp->classify(skb, tp, res); -- cgit v1.2.3 From 265b4da82dbf5df04bee5a5d46b7474b1aaf326a Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 14 Feb 2023 08:49:15 -0500 Subject: net/sched: Retire rsvp classifier The rsvp classifier has served us well for about a quarter of a century but has has not been getting much maintenance attention due to lack of known users. Signed-off-by: Jamal Hadi Salim Acked-by: Jiri Pirko Signed-off-by: Paolo Abeni --- include/net/tc_wrapper.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index 8ba241760d0a..a6d481b5bcbc 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -152,8 +152,6 @@ TC_INDIRECT_FILTER_DECLARE(flow_classify); TC_INDIRECT_FILTER_DECLARE(fw_classify); TC_INDIRECT_FILTER_DECLARE(mall_classify); TC_INDIRECT_FILTER_DECLARE(route4_classify); -TC_INDIRECT_FILTER_DECLARE(rsvp_classify); -TC_INDIRECT_FILTER_DECLARE(rsvp6_classify); TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -198,14 +196,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tp->classify == route4_classify) return route4_classify(skb, tp, res); #endif -#if IS_BUILTIN(CONFIG_NET_CLS_RSVP) - if (tp->classify == rsvp_classify) - return rsvp_classify(skb, tp, res); -#endif -#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6) - if (tp->classify == rsvp6_classify) - return rsvp6_classify(skb, tp, res); -#endif skip: return tp->classify(skb, tp, res); -- cgit v1.2.3 From 7d12057b45fbc1e1315d327dca13e8d6b5019113 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Feb 2023 18:15:31 -0300 Subject: net/sched: act_nat: transition to percpu stats and rcu The tc action act_nat was using shared stats and taking the per action lock in the datapath. Improve it by using percpu stats and rcu. perf before: - 10.48% tcf_nat_act - 81.83% _raw_spin_lock 81.08% native_queued_spin_lock_slowpath perf after: - 0.48% tcf_nat_act tdc results: 1..27 ok 1 7565 - Add nat action on ingress with default control action ok 2 fd79 - Add nat action on ingress with pipe control action ok 3 eab9 - Add nat action on ingress with continue control action ok 4 c53a - Add nat action on ingress with reclassify control action ok 5 76c9 - Add nat action on ingress with jump control action ok 6 24c6 - Add nat action on ingress with drop control action ok 7 2120 - Add nat action on ingress with maximum index value ok 8 3e9d - Add nat action on ingress with invalid index value ok 9 f6c9 - Add nat action on ingress with invalid IP address ok 10 be25 - Add nat action on ingress with invalid argument ok 11 a7bd - Add nat action on ingress with DEFAULT IP address ok 12 ee1e - Add nat action on ingress with ANY IP address ok 13 1de8 - Add nat action on ingress with ALL IP address ok 14 8dba - Add nat action on egress with default control action ok 15 19a7 - Add nat action on egress with pipe control action ok 16 f1d9 - Add nat action on egress with continue control action ok 17 6d4a - Add nat action on egress with reclassify control action ok 18 b313 - Add nat action on egress with jump control action ok 19 d9fc - Add nat action on egress with drop control action ok 20 a895 - Add nat action on egress with DEFAULT IP address ok 21 2572 - Add nat action on egress with ANY IP address ok 22 37f3 - Add nat action on egress with ALL IP address ok 23 6054 - Add nat action on egress with cookie ok 24 79d6 - Add nat action on ingress with cookie ok 25 4b12 - Replace nat action with invalid goto chain control ok 26 b811 - Delete nat action with valid index ok 27 a521 - Delete nat action with invalid index Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Signed-off-by: Paolo Abeni --- include/net/tc_act/tc_nat.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_nat.h b/include/net/tc_act/tc_nat.h index c14407160812..c869274ac529 100644 --- a/include/net/tc_act/tc_nat.h +++ b/include/net/tc_act/tc_nat.h @@ -5,13 +5,17 @@ #include #include -struct tcf_nat { - struct tc_action common; - +struct tcf_nat_parms { __be32 old_addr; __be32 new_addr; __be32 mask; u32 flags; + struct rcu_head rcu; +}; + +struct tcf_nat { + struct tc_action common; + struct tcf_nat_parms __rcu *parms; }; #define to_tcf_nat(a) ((struct tcf_nat *)a) -- cgit v1.2.3 From 288864effe33885988d53faf7830b35cb9a84c7a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Feb 2023 18:15:32 -0300 Subject: net/sched: act_connmark: transition to percpu stats and rcu The tc action act_connmark was using shared stats and taking the per action lock in the datapath. Improve it by using percpu stats and rcu. perf before: - 13.55% tcf_connmark_act - 81.18% _raw_spin_lock 80.46% native_queued_spin_lock_slowpath perf after: - 2.85% tcf_connmark_act tdc results: 1..15 ok 1 2002 - Add valid connmark action with defaults ok 2 56a5 - Add valid connmark action with control pass ok 3 7c66 - Add valid connmark action with control drop ok 4 a913 - Add valid connmark action with control pipe ok 5 bdd8 - Add valid connmark action with control reclassify ok 6 b8be - Add valid connmark action with control continue ok 7 d8a6 - Add valid connmark action with control jump ok 8 aae8 - Add valid connmark action with zone argument ok 9 2f0b - Add valid connmark action with invalid zone argument ok 10 9305 - Add connmark action with unsupported argument ok 11 71ca - Add valid connmark action and replace it ok 12 5f8f - Add valid connmark action with cookie ok 13 c506 - Replace connmark with invalid goto chain control ok 14 6571 - Delete connmark action with valid index ok 15 3426 - Delete connmark action with invalid index Reviewed-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Signed-off-by: Paolo Abeni --- include/net/tc_act/tc_connmark.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h index 1f4cb477bb5d..e8dd77a96748 100644 --- a/include/net/tc_act/tc_connmark.h +++ b/include/net/tc_act/tc_connmark.h @@ -4,10 +4,15 @@ #include -struct tcf_connmark_info { - struct tc_action common; +struct tcf_connmark_parms { struct net *net; u16 zone; + struct rcu_head rcu; +}; + +struct tcf_connmark_info { + struct tc_action common; + struct tcf_connmark_parms __rcu *parms; }; #define to_connmark(a) ((struct tcf_connmark_info *)a) -- cgit v1.2.3 From 2954fe60e33da0f4de4d81a4c95c7dddb517d00c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Feb 2023 14:45:22 +0100 Subject: netfilter: let reset rules clean out conntrack entries iptables/nftables support responding to tcp packets with tcp resets. The generated tcp reset packet passes through both output and postrouting netfilter hooks, but conntrack will never see them because the generated skb has its ->nfct pointer copied over from the packet that triggered the reset rule. If the reset rule is used for established connections, this may result in the conntrack entry to be around for a very long time (default timeout is 5 days). One way to avoid this would be to not copy the nf_conn pointer so that the rest packet passes through conntrack too. Problem is that output rules might not have the same conntrack zone setup as the prerouting ones, so its possible that the reset skb won't find the correct entry. Generating a template entry for the skb seems error prone as well. Add an explicit "closing" function that switches a confirmed conntrack entry to closed state and wire this up for tcp. If the entry isn't confirmed, no action is needed because the conntrack entry will never be committed to the table. Reported-by: Russel King Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa464..3dbf947285be 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -125,6 +125,12 @@ struct nf_conn { union nf_conntrack_proto proto; }; +static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { @@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) void nf_ct_destroy(struct nf_conntrack *nfct); +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { -- cgit v1.2.3 From 784d4477f07b930df73bc77e842e03f1dacb83aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Feb 2023 16:28:40 +0000 Subject: ipv6: icmp6: add SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS This is a generic drop reason for any error detected in ndisc_parse_options(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dropreason.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index ef3f65d135d3..239a5c0ea83e 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -76,6 +76,7 @@ FN(IPV6_NDISC_FRAG) \ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ + FN(IPV6_NDISC_BAD_OPTIONS) \ FNe(MAX) /** @@ -330,6 +331,8 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT, /** @SKB_DROP_REASON_IPV6_NDISC_BAD_CODE: invalid NDISC icmp6 code. */ SKB_DROP_REASON_IPV6_NDISC_BAD_CODE, + /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */ + SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' -- cgit v1.2.3 From c34b8bb11ebc135e970653bd6fc8e3f863fb6a81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Feb 2023 16:28:41 +0000 Subject: ipv6: icmp6: add SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST Hosts can often receive neighbour discovery messages that are not for them. Use a dedicated drop reason to make clear the packet is dropped for this normal case. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dropreason.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 239a5c0ea83e..c0a3ea806cd5 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -77,6 +77,7 @@ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ FN(IPV6_NDISC_BAD_OPTIONS) \ + FN(IPV6_NDISC_NS_OTHERHOST) \ FNe(MAX) /** @@ -333,6 +334,10 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_BAD_CODE, /** @SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS: invalid NDISC options. */ SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS, + /** @SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST: NEIGHBOUR SOLICITATION + * for another host. + */ + SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' -- cgit v1.2.3 From 50bcfe8df7c73ce51762f65d218b4ef0cc5da3ee Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 17 Feb 2023 13:28:49 +0100 Subject: net: make default_rps_mask a per netns attribute That really was meant to be a per netns attribute from the beginning. The idea is that once proper isolation is in place in the main namespace, additional demux in the child namespaces will be redundant. Let's make child netns default rps mask empty by default. To avoid bloating the netns with a possibly large cpumask, allocate it on-demand during the first write operation. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/netns/core.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 8249060cf5d0..a91ef9f8de60 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -6,6 +6,7 @@ struct ctl_table_header; struct prot_inuse; +struct cpumask; struct netns_core { /* core sysctls */ @@ -17,6 +18,10 @@ struct netns_core { #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; #endif + +#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) + struct cpumask *rps_default_mask; +#endif }; #endif -- cgit v1.2.3 From db4b49025c0c7116f1d2dfe8d5bbfc983ac054de Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sat, 18 Feb 2023 00:36:13 +0200 Subject: net/sched: Rename user cookie and act cookie struct tc_action->act_cookie is a user defined cookie, and the related struct flow_action_entry->act_cookie is used as an handle similar to struct flow_cls_offload->cookie. Rename tc_action->act_cookie to user_cookie, and flow_action_entry->act_cookie to cookie so their names would better fit their usage. Signed-off-by: Paul Blakey Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 2 +- include/net/flow_offload.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 2a6f443f0ef6..4ae0580b63ca 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,7 +39,7 @@ struct tc_action { struct gnet_stats_basic_sync __percpu *cpu_bstats; struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; - struct tc_cookie __rcu *act_cookie; + struct tc_cookie __rcu *user_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; u8 hw_stats; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 8c05455b1e34..9c5cb12f8a90 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -228,7 +228,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; u32 hw_index; - unsigned long act_cookie; + unsigned long cookie; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -321,7 +321,7 @@ struct flow_action_entry { u16 sid; } pppoe; }; - struct flow_action_cookie *cookie; /* user defined action cookie */ + struct flow_action_cookie *user_cookie; /* user defined action cookie */ }; struct flow_action { -- cgit v1.2.3 From 80cd22c35c9001fe72bf614d29439de41933deca Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sat, 18 Feb 2023 00:36:14 +0200 Subject: net/sched: cls_api: Support hardware miss to tc action For drivers to support partial offload of a filter's action list, add support for action miss to specify an action instance to continue from in sw. CT action in particular can't be fully offloaded, as new connections need to be handled in software. This imposes other limitations on the actions that can be offloaded together with the CT action, such as packet modifications. Assign each action on a filter's action list a unique miss_cookie which drivers can then use to fill action_miss part of the tc skb extension. On getting back this miss_cookie, find the action instance with relevant cookie and continue classifying from there. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Reviewed-by: Marcelo Ricardo Leitner Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 1 + include/net/pkt_cls.h | 34 ++++++++++++++++++++-------------- include/net/sch_generic.h | 2 ++ 3 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 9c5cb12f8a90..118082eae48c 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -229,6 +229,7 @@ struct flow_action_entry { enum flow_action_id id; u32 hw_index; unsigned long cookie; + u64 miss_cookie; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ace437c6754b..b3b5b0b62f16 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -59,6 +59,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, void tcf_block_put(struct tcf_block *block); void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei); +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, + int police, struct tcf_proto *tp, u32 handle, bool used_action_miss); static inline bool tcf_block_shared(struct tcf_block *block) { @@ -229,6 +231,7 @@ struct tcf_exts { struct tc_action **actions; struct net *net; netns_tracker ns_tracker; + struct tcf_exts_miss_cookie_node *miss_cookie_node; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -240,21 +243,11 @@ struct tcf_exts { static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, int action, int police) { -#ifdef CONFIG_NET_CLS_ACT - exts->type = 0; - exts->nr_actions = 0; - /* Note: we do not own yet a reference on net. - * This reference might be taken later from tcf_exts_get_net(). - */ - exts->net = net; - exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), - GFP_KERNEL); - if (!exts->actions) - return -ENOMEM; +#ifdef CONFIG_NET_CLS + return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false); +#else + return -EOPNOTSUPP; #endif - exts->action = action; - exts->police = police; - return 0; } /* Return false if the netns is being destroyed in cleanup_net(). Callers @@ -360,6 +353,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return TC_ACT_OK; } +static inline int +tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index, + struct tcf_result *res) +{ +#ifdef CONFIG_NET_CLS_ACT + return tcf_action_exec(skb, exts->actions + act_index, + exts->nr_actions - act_index, res); +#else + return TC_ACT_OK; +#endif +} + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, @@ -584,6 +589,7 @@ int tc_setup_offload_action(struct flow_action *flow_action, void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], + u32 miss_cookie_base, struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index af4aa66aaa4e..fab5ba3e61b7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -369,6 +369,8 @@ struct tcf_proto_ops { struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); + struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, + u32 handle); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, -- cgit v1.2.3 From 4d4266e3fd321fadb628ce02de641b129522c39c Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Sat, 18 Feb 2023 00:21:30 +0200 Subject: page_pool: add a comment explaining the fragment counter usage When reading the page_pool code the first impression is that keeping two separate counters, one being the page refcnt and the other being fragment pp_frag_count, is counter-intuitive. However without that fragment counter we don't know when to reliably destroy or sync the outstanding DMA mappings. So let's add a comment explaining this part. Reviewed-by: Alexander Duyck Signed-off-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20230217222130.85205-1-ilias.apalodimas@linaro.org Signed-off-by: Jakub Kicinski --- include/net/page_pool.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 34bf531ffc8d..ddfa0b328677 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -277,6 +277,16 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct); +/* pp_frag_count represents the number of writers who can update the page + * either by updating skb->data or via DMA mappings for the device. + * We can't rely on the page refcnt for that as we don't know who might be + * holding page references and we can't reliably destroy or sync DMA mappings + * of the fragments. + * + * When pp_frag_count reaches 0 we can either recycle the page if the page + * refcnt is 1 or return it back to the memory allocator and destroy any + * mappings we have. + */ static inline void page_pool_fragment_page(struct page *page, long nr) { atomic_long_set(&page->pp_frag_count, nr); -- cgit v1.2.3 From fdf6491193e411087ae77bcbc6468e3e1cff99ed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Feb 2023 17:24:00 +0100 Subject: netfilter: ctnetlink: make event listener tracking global pernet tracking doesn't work correctly because other netns might have set NETLINK_LISTEN_ALL_NSID on its event socket. In this case its expected that events originating in other net namespaces are also received. Making pernet-tracking work while also honoring NETLINK_LISTEN_ALL_NSID requires much more intrusive changes both in netlink and nfnetlink, f.e. adding a 'setsockopt' callback that lets nfnetlink know that the event socket entered (or left) ALL_NSID mode. Move to global tracking instead: if there is an event socket anywhere on the system, all net namespaces which have conntrack enabled and use autobind mode will allocate the ecache extension. netlink_has_listeners() returns false only if the given group has no subscribers in any net namespace, the 'net' argument passed to nfnetlink_has_listeners is only used to derive the protocol (nfnetlink), it has no other effect. For proper NETLINK_LISTEN_ALL_NSID-aware pernet tracking of event listeners a new netlink_has_net_listeners() is also needed. Fixes: 90d1daa45849 ("netfilter: conntrack: add nf_conntrack_events autodetect mode") Reported-by: Bryce Kahle Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e1290c159184..1f463b3957c7 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,7 +95,6 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS - u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ -- cgit v1.2.3 From 68ba44639537de6f91fe32783766322d41848127 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 22 Feb 2023 12:07:21 -0500 Subject: sctp: add a refcnt in sctp_stream_priorities to avoid a nested loop With this refcnt added in sctp_stream_priorities, we don't need to traverse all streams to check if the prio is used by other streams when freeing one stream's prio in sctp_sched_prio_free_sid(). This can avoid a nested loop (up to 65535 * 65535), which may cause a stuck as Ying reported: watchdog: BUG: soft lockup - CPU#23 stuck for 26s! [ksoftirqd/23:136] Call Trace: sctp_sched_prio_free_sid+0xab/0x100 [sctp] sctp_stream_free_ext+0x64/0xa0 [sctp] sctp_stream_free+0x31/0x50 [sctp] sctp_association_free+0xa5/0x200 [sctp] Note that it doesn't need to use refcount_t type for this counter, as its accessing is always protected under the sock lock. v1->v2: - add a check in sctp_sched_prio_set to avoid the possible prio_head refcnt overflow. Fixes: 9ed7bfc79542 ("sctp: fix memory leak in sctp_stream_outq_migrate()") Reported-by: Ying Xu Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Link: https://lore.kernel.org/r/825eb0c905cb864991eba335f4a2b780e543f06b.1677085641.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index afa3781e3ca2..e1f6e7fc2b11 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1412,6 +1412,7 @@ struct sctp_stream_priorities { /* The next stream in line */ struct sctp_stream_out_ext *next; __u16 prio; + __u16 users; }; struct sctp_stream_out_ext { -- cgit v1.2.3 From 4a02426787bf024dafdb79b362285ee325de3f5e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2023 10:58:56 +0100 Subject: netfilter: tproxy: fix deadlock due to missing BH disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xtables packet traverser performs an unconditional local_bh_disable(), but the nf_tables evaluation loop does not. Functions that are called from either xtables or nftables must assume that they can be called in process context. inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. If tproxy is used from nf_tables its possible that we'll deadlock trying to aquire a lock already held in process context. Add a small helper that takes care of this and use it. Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Reported-and-tested-by: Major Dávid Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tproxy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 82d0e41b76f2..faa108b1ba67 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) return false; } +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { -- cgit v1.2.3