From 2d52c9e43a48387a323ab6a4fed755d55040e625 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Jul 2025 17:29:21 +0200 Subject: wifi: rt2800: select CONFIG_RT2X00_LIB as needed The rt2800 specific code requires the more general library code: ERROR: modpost: "rt2x00queue_get_entry" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_dmastart" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_dmadone" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_rxdone" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_txdone_nomatch" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_txdone" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00queue_get_entry" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_get_bssidx" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00mac_conf_tx" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_txdone_noinfo" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! Select the symbol to avoid this build failure. Fixes: 7f6109086c9e ("wifi: rt2800: move 2x00soc to 2800soc") Signed-off-by: Arnd Bergmann Acked-by: Stanislaw Gruszka Reviewed-by: Sergio Paracuellos Link: https://patch.msgid.link/20250729152924.2462423-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ralink/rt2x00/Kconfig b/drivers/net/wireless/ralink/rt2x00/Kconfig index 4d98b7723c56..d66fc839c3ce 100644 --- a/drivers/net/wireless/ralink/rt2x00/Kconfig +++ b/drivers/net/wireless/ralink/rt2x00/Kconfig @@ -225,6 +225,7 @@ config RT2800_LIB_MMIO config RT2X00_LIB_MMIO tristate + select RT2X00_LIB config RT2X00_LIB_PCI tristate -- cgit v1.2.3 From f64768bec0d57988782d26d1ea7ae21f959309dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Jul 2025 09:58:33 +0200 Subject: wifi: rt2x00: fix CRC_CCITT dependency Compile-testing this driver on Arm platforms shows a link failure when the CRC functions are not part of the kernel: x86_64-linux-ld: drivers/net/wireless/ralink/rt2x00/rt2800lib.o: in function `rt2800_check_firmware': rt2800lib.c:(.text+0x20e5): undefined reference to `crc_ccitt' Move the select statement to the correct Kconfig symbol to match the call site. Fixes: 311b05e235cf ("wifi: rt2x00: add COMPILE_TEST") Signed-off-by: Arnd Bergmann Acked-by: Stanislaw Gruszka Reviewed-by: Sergio Paracuellos Link: https://patch.msgid.link/20250731075837.1969136-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/Kconfig b/drivers/net/wireless/ralink/rt2x00/Kconfig index d66fc839c3ce..17f063fc0b57 100644 --- a/drivers/net/wireless/ralink/rt2x00/Kconfig +++ b/drivers/net/wireless/ralink/rt2x00/Kconfig @@ -66,7 +66,6 @@ config RT2800PCI select RT2X00_LIB_PCI select RT2X00_LIB_FIRMWARE select RT2X00_LIB_CRYPTO - select CRC_CCITT select EEPROM_93CX6 help This adds support for rt27xx/rt28xx/rt30xx wireless chipset family. @@ -142,7 +141,6 @@ config RT2800USB select RT2X00_LIB_USB select RT2X00_LIB_FIRMWARE select RT2X00_LIB_CRYPTO - select CRC_CCITT help This adds support for rt27xx/rt28xx/rt30xx wireless chipset family. Supported chips: RT2770, RT2870 & RT3070, RT3071 & RT3072 @@ -217,6 +215,7 @@ config RT2800SOC config RT2800_LIB tristate + select CRC_CCITT config RT2800_LIB_MMIO tristate -- cgit v1.2.3 From 26e84445f02ce6b2fe5f3e0e28ff7add77f35e08 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 13 Aug 2025 16:52:36 +0300 Subject: wifi: cfg80211: fix use-after-free in cmp_bss() Following bss_free() quirk introduced in commit 776b3580178f ("cfg80211: track hidden SSID networks properly"), adjust cfg80211_update_known_bss() to free the last beacon frame elements only if they're not shared via the corresponding 'hidden_beacon_bss' pointer. Reported-by: syzbot+30754ca335e6fb7e3092@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=30754ca335e6fb7e3092 Fixes: 3ab8227d3e7d ("cfg80211: refactor cfg80211_bss_update") Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20250813135236.799384-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index a8339ed52404..6c7b7c3828a4 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1916,7 +1916,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, */ f = rcu_access_pointer(new->pub.beacon_ies); - kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); + if (!new->pub.hidden_beacon_bss) + kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); return false; } -- cgit v1.2.3 From 9cb83d4be0b9b697eae93d321e0da999f9cdfcfc Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 22 Aug 2025 13:08:39 +0800 Subject: wifi: brcmfmac: fix use-after-free when rescheduling brcmf_btcoex_info work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The brcmf_btcoex_detach() only shuts down the btcoex timer, if the flag timer_on is false. However, the brcmf_btcoex_timerfunc(), which runs as timer handler, sets timer_on to false. This creates critical race conditions: 1.If brcmf_btcoex_detach() is called while brcmf_btcoex_timerfunc() is executing, it may observe timer_on as false and skip the call to timer_shutdown_sync(). 2.The brcmf_btcoex_timerfunc() may then reschedule the brcmf_btcoex_info worker after the cancel_work_sync() has been executed, resulting in use-after-free bugs. The use-after-free bugs occur in two distinct scenarios, depending on the timing of when the brcmf_btcoex_info struct is freed relative to the execution of its worker thread. Scenario 1: Freed before the worker is scheduled The brcmf_btcoex_info is deallocated before the worker is scheduled. A race condition can occur when schedule_work(&bt_local->work) is called after the target memory has been freed. The sequence of events is detailed below: CPU0 | CPU1 brcmf_btcoex_detach | brcmf_btcoex_timerfunc | bt_local->timer_on = false; if (cfg->btcoex->timer_on) | ... | cancel_work_sync(); | ... | kfree(cfg->btcoex); // FREE | | schedule_work(&bt_local->work); // USE Scenario 2: Freed after the worker is scheduled The brcmf_btcoex_info is freed after the worker has been scheduled but before or during its execution. In this case, statements within the brcmf_btcoex_handler() — such as the container_of macro and subsequent dereferences of the brcmf_btcoex_info object will cause a use-after-free access. The following timeline illustrates this scenario: CPU0 | CPU1 brcmf_btcoex_detach | brcmf_btcoex_timerfunc | bt_local->timer_on = false; if (cfg->btcoex->timer_on) | ... | cancel_work_sync(); | ... | schedule_work(); // Reschedule | kfree(cfg->btcoex); // FREE | brcmf_btcoex_handler() // Worker /* | btci = container_of(....); // USE The kfree() above could | ... also occur at any point | btci-> // USE during the worker's execution| */ | To resolve the race conditions, drop the conditional check and call timer_shutdown_sync() directly. It can deactivate the timer reliably, regardless of its current state. Once stopped, the timer_on state is then set to false. Fixes: 61730d4dfffc ("brcmfmac: support critical protocol API for DHCP") Acked-by: Arend van Spriel Signed-off-by: Duoming Zhou Link: https://patch.msgid.link/20250822050839.4413-1-duoming@zju.edu.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c index 69ef8cf203d2..67c0c5a92f99 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c @@ -393,10 +393,8 @@ void brcmf_btcoex_detach(struct brcmf_cfg80211_info *cfg) if (!cfg->btcoex) return; - if (cfg->btcoex->timer_on) { - cfg->btcoex->timer_on = false; - timer_shutdown_sync(&cfg->btcoex->timer); - } + timer_shutdown_sync(&cfg->btcoex->timer); + cfg->btcoex->timer_on = false; cancel_work_sync(&cfg->btcoex->work); -- cgit v1.2.3 From a33b375ab5b3a9897a0ab76be8258d9f6b748628 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 25 Aug 2025 10:29:11 +0800 Subject: wifi: mac80211: fix incorrect type for ret The variable ret is declared as a u32 type, but it is assigned a value of -EOPNOTSUPP. Since unsigned types cannot correctly represent negative values, the type of ret should be changed to int. Signed-off-by: Liao Yuanhong Link: https://patch.msgid.link/20250825022911.139377-1-liaoyuanhong@vivo.com Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 181bcb34b795..55105d238d6b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1416,7 +1416,7 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_ftm_responder_stats *ftm_stats) { - u32 ret = -EOPNOTSUPP; + int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); -- cgit v1.2.3 From 7e2f3213e85eba00acb4cfe6d71647892d63c3a1 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 26 Aug 2025 18:54:37 +1000 Subject: wifi: mac80211: increase scan_ies_len for S1G Currently the S1G capability element is not taken into account for the scan_ies_len, which leads to a buffer length validation failure in ieee80211_prep_hw_scan() and subsequent WARN in __ieee80211_start_scan(). This prevents hw scanning from functioning. To fix ensure we accommodate for the S1G capability length. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250826085437.3493-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/mac80211/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9c8f18b258a6..3ae6104e5cb2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1111,7 +1111,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result, i; enum nl80211_band band; int channels, max_bitrates; - bool supp_ht, supp_vht, supp_he, supp_eht; + bool supp_ht, supp_vht, supp_he, supp_eht, supp_s1g; struct cfg80211_chan_def dflt_chandef = {}; if (ieee80211_hw_check(hw, QUEUE_CONTROL) && @@ -1227,6 +1227,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_vht = false; supp_he = false; supp_eht = false; + supp_s1g = false; for (band = 0; band < NUM_NL80211_BANDS; band++) { const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; @@ -1274,6 +1275,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) max_bitrates = sband->n_bitrates; supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; + supp_s1g = supp_s1g || sband->s1g_cap.s1g; for_each_sband_iftype_data(sband, i, iftd) { u8 he_40_mhz_cap; @@ -1406,6 +1408,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->scan_ies_len += 2 + sizeof(struct ieee80211_vht_cap); + if (supp_s1g) + local->scan_ies_len += 2 + sizeof(struct ieee80211_s1g_cap); + /* * HE cap element is variable in size - set len to allow max size */ if (supp_he) { -- cgit v1.2.3 From 87b07a1fbc6b5c23d3b3584ab4288bc9106d3274 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 15:33:25 -0700 Subject: wifi: mt76: mt7996: Initialize hdr before passing to skb_put_data() A new warning in clang [1] points out a couple of places where a hdr variable is not initialized then passed along to skb_put_data(). drivers/net/wireless/mediatek/mt76/mt7996/mcu.c:1894:21: warning: variable 'hdr' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer] 1894 | skb_put_data(skb, &hdr, sizeof(hdr)); | ^~~ drivers/net/wireless/mediatek/mt76/mt7996/mcu.c:3386:21: warning: variable 'hdr' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer] 3386 | skb_put_data(skb, &hdr, sizeof(hdr)); | ^~~ Zero initialize these headers as done in other places in the driver when there is nothing stored in the header. Cc: stable@vger.kernel.org Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Link: https://github.com/llvm/llvm-project/commit/00dacf8c22f065cb52efb14cd091d441f19b319e [1] Closes: https://github.com/ClangBuiltLinux/linux/issues/2104 Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20250715-mt7996-fix-uninit-const-pointer-v1-1-b5d8d11d7b78@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 3593fd40c51b..fe1b34386de2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1879,8 +1879,8 @@ mt7996_mcu_get_mmps_mode(enum ieee80211_smps_mode smps) int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, void *data, u16 version) { + struct uni_header hdr = {}; struct ra_fixed_rate *req; - struct uni_header hdr; struct sk_buff *skb; struct tlv *tlv; int len; @@ -3372,7 +3372,7 @@ int mt7996_mcu_set_hdr_trans(struct mt7996_dev *dev, bool hdr_trans) { struct { u8 __rsv[4]; - } __packed hdr; + } __packed hdr = {}; struct hdr_trans_blacklist *req_blacklist; struct hdr_trans_en *req_en; struct sk_buff *skb; -- cgit v1.2.3 From 87f38519d27a514c9909f84b8f1334125df9778e Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Wed, 16 Jul 2025 18:54:01 +0200 Subject: wifi: mt76: mt7921: don't disconnect when CSA to DFS chan When station mode, don't disconnect when we get channel switch from AP to DFS channel. Most APs send CSA request after pass background CAC. In other case we should disconnect after detect beacon miss. Without patch when we get CSA to DFS channel get: "kernel: wlo1: preparing for channel switch failed, disconnecting" Fixes: 8aa2f59260eb ("wifi: mt76: mt7921: introduce CSA support") Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20250716165443.28354-1-janusz.dziedzic@gmail.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 40954e64c7fc..5881040ac195 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1459,11 +1459,8 @@ static int mt7921_pre_channel_switch(struct ieee80211_hw *hw, if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) return -EOPNOTSUPP; - /* Avoid beacon loss due to the CAC(Channel Availability Check) time - * of the AP. - */ if (!cfg80211_chandef_usable(hw->wiphy, &chsw->chandef, - IEEE80211_CHAN_RADAR)) + IEEE80211_CHAN_DISABLED)) return -EOPNOTSUPP; return 0; -- cgit v1.2.3 From 9f15701370ec15fbf1f6a1cbbf584b0018d036b5 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 27 Jul 2025 07:04:13 -0700 Subject: wifi: mt76: mt7925: fix locking in mt7925_change_vif_links() &dev->mt76.mutex lock is taken using mt792x_mutex_acquire(dev) but not released in one of the error paths, add the unlock to fix it. Fixes: 5cd0bd815c8a ("wifi: mt76: mt7925: fix NULL deref check in mt7925_change_vif_links") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202503031055.3ZRqxhAl-lkp@intel.com/ Signed-off-by: Harshit Mogalapalli Link: https://patch.msgid.link/20250727140416.1153406-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index a8d25b7d47d0..103909307518 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -2069,8 +2069,10 @@ mt7925_change_vif_links(struct ieee80211_hw *hw, struct ieee80211_vif *vif, GFP_KERNEL); mlink = devm_kzalloc(dev->mt76.dev, sizeof(*mlink), GFP_KERNEL); - if (!mconf || !mlink) + if (!mconf || !mlink) { + mt792x_mutex_release(dev); return -ENOMEM; + } } mconfs[link_id] = mconf; -- cgit v1.2.3 From 55424e7b9eeb141d9c8d8a8740ee131c28490425 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 28 Jul 2025 13:26:12 +0800 Subject: wifi: mt76: mt7925: fix the wrong bss cleanup for SAP When in SAP mode, if a STA disconnect, the SAP's BSS should not be cleared. Fixes: 0ebb60da8416 ("wifi: mt76: mt7925: adjust rm BSS flow to prevent next connection failure") Cc: stable@vger.kernel.org Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250728052612.39751-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 103909307518..b0e053b15227 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1191,6 +1191,9 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct mt792x_bss_conf *mconf; struct mt792x_link_sta *mlink; + if (vif->type == NL80211_IFTYPE_AP) + break; + link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); if (!link_sta) continue; -- cgit v1.2.3 From c22769de25095c6777e8acb68a1349a3257fc955 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 18 Aug 2025 10:02:03 +0800 Subject: wifi: mt76: mt7925u: use connac3 tx aggr check in tx complete MT7925 is a connac3 device; using the connac2 helper mis-parses TXWI and breaks AMPDU/BA accounting. Use the connac3-specific helper mt7925_tx_check_aggr() instead, Cc: stable@vger.kernel.org Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Reported-by: Nick Morrow Tested-by: Nick Morrow Tested-on: Netgear A9000 USB WiFi adapter Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250818020203.992338-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 75823c9fd3a1..b581ab9427f2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1449,7 +1449,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) - mt76_connac2_tx_check_aggr(sta, txwi); + mt7925_tx_check_aggr(sta, e->skb, wcid); skb_pull(e->skb, headroom); mt76_tx_complete_skb(mdev, e->wcid, e->skb); -- cgit v1.2.3 From dd6e89cad9951acef3723f3f21b2e892a23b371b Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 18 Aug 2025 11:02:01 +0800 Subject: wifi: mt76: mt7925: skip EHT MLD TLV on non-MLD and pass conn_state for sta_cmd Return early in mt7925_mcu_sta_eht_mld_tlv() for non-MLD vifs to avoid bogus MLD TLVs, and pass the proper connection state to sta_basic TLV. Cc: stable@vger.kernel.org Fixes: cb1353ef3473 ("wifi: mt76: mt7925: integrate *mlo_sta_cmd and *sta_cmd") Reported-by: Tal Inbar Tested-by: Tal Inbar Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250818030201.997940-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 300c863f0e3e..cd457be26523 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1834,13 +1834,13 @@ mt7925_mcu_sta_eht_mld_tlv(struct sk_buff *skb, struct tlv *tlv; u16 eml_cap; + if (!ieee80211_vif_is_mld(vif)) + return; + tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_EHT_MLD, sizeof(*eht_mld)); eht_mld = (struct sta_rec_eht_mld *)tlv; eht_mld->mld_type = 0xff; - if (!ieee80211_vif_is_mld(vif)) - return; - ext_capa = cfg80211_get_iftype_ext_capa(wiphy, ieee80211_vif_type_p2p(vif)); if (!ext_capa) @@ -1912,6 +1912,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, struct mt76_dev *dev = phy->dev; struct mt792x_bss_conf *mconf; struct sk_buff *skb; + int conn_state; mconf = mt792x_vif_to_link(mvif, info->wcid->link_id); @@ -1920,10 +1921,13 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, if (IS_ERR(skb)) return PTR_ERR(skb); + conn_state = info->enable ? CONN_STATE_PORT_SECURE : + CONN_STATE_DISCONNECT; + if (info->enable && info->link_sta) { mt76_connac_mcu_sta_basic_tlv(dev, skb, info->link_conf, info->link_sta, - info->enable, info->newly); + conn_state, info->newly); mt7925_mcu_sta_phy_tlv(skb, info->vif, info->link_sta); mt7925_mcu_sta_ht_tlv(skb, info->link_sta); mt7925_mcu_sta_vht_tlv(skb, info->link_sta); -- cgit v1.2.3 From 4c2334587b0a13b8f4eda1336ae657297fcd743b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Aug 2025 14:11:06 +0200 Subject: wifi: mt76: prevent non-offchannel mgmt tx during scan/roc Only put probe request packets in the offchannel queue if IEEE80211_TX_CTRL_DONT_USE_RATE_MASK is set and IEEE80211_TX_CTL_TX_OFFCHAN is unset. Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Reported-by: Chad Monroe Link: https://patch.msgid.link/20250813121106.81559-2-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index e6cf16706667..03b042fdf997 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -332,6 +332,7 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta, struct mt76_wcid *wcid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; struct sk_buff_head *head; if (mt76_testmode_enabled(phy)) { @@ -349,7 +350,8 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta, info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->band_idx); if ((info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || - (info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) + ((info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK) && + ieee80211_is_probe_req(hdr->frame_control))) head = &wcid->tx_offchannel; else head = &wcid->tx_pending; -- cgit v1.2.3 From f30906c55a400a9b7fc677e3f4c614b9069bd4a8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Aug 2025 14:11:05 +0200 Subject: wifi: mt76: mt7996: disable beacons when going offchannel Avoid leaking beacons on unrelated channels during scanning/roc Fixes: c56d6edebc1f ("wifi: mt76: mt7996: use emulated hardware scan support") Reported-by: Chad Monroe Link: https://patch.msgid.link/20250813121106.81559-1-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 46 +++++++++++++--------- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 5 +++ drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 11 ++++-- drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 226534490792..a9f7e5626dcd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1694,43 +1694,53 @@ mt7996_wait_reset_state(struct mt7996_dev *dev, u32 state) static void mt7996_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif) { - struct ieee80211_hw *hw = priv; + struct ieee80211_bss_conf *link_conf; + struct mt7996_phy *phy = priv; + struct mt7996_dev *dev = phy->dev; + unsigned int link_id; + switch (vif->type) { case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: - mt7996_mcu_add_beacon(hw, vif, &vif->bss_conf); break; default: - break; + return; + } + + for_each_vif_active_link(vif, link_conf, link_id) { + struct mt7996_vif_link *link; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link || link->phy != phy) + continue; + + mt7996_mcu_add_beacon(dev->mt76.hw, vif, link_conf); } } +void mt7996_mac_update_beacons(struct mt7996_phy *phy) +{ + ieee80211_iterate_active_interfaces(phy->mt76->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7996_update_vif_beacon, phy); +} + static void mt7996_update_beacons(struct mt7996_dev *dev) { struct mt76_phy *phy2, *phy3; - ieee80211_iterate_active_interfaces(dev->mt76.hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, dev->mt76.hw); + mt7996_mac_update_beacons(&dev->phy); phy2 = dev->mt76.phys[MT_BAND1]; - if (!phy2) - return; - - ieee80211_iterate_active_interfaces(phy2->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, phy2->hw); + if (phy2) + mt7996_mac_update_beacons(phy2->priv); phy3 = dev->mt76.phys[MT_BAND2]; - if (!phy3) - return; - - ieee80211_iterate_active_interfaces(phy3->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, phy3->hw); + if (phy3) + mt7996_mac_update_beacons(phy3->priv); } void mt7996_tx_token_put(struct mt7996_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 92b57bcce749..84f731b387d2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -516,6 +516,9 @@ int mt7996_set_channel(struct mt76_phy *mphy) struct mt7996_phy *phy = mphy->priv; int ret; + if (mphy->offchannel) + mt7996_mac_update_beacons(phy); + ret = mt7996_mcu_set_chan_info(phy, UNI_CHANNEL_SWITCH); if (ret) goto out; @@ -533,6 +536,8 @@ int mt7996_set_channel(struct mt76_phy *mphy) mt7996_mac_reset_counters(phy); phy->noise = 0; + if (!mphy->offchannel) + mt7996_mac_update_beacons(phy); out: ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index fe1b34386de2..0be03eb3cf46 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2755,13 +2755,15 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { struct mt7996_dev *dev = mt7996_hw_dev(hw); - struct mt76_vif_link *mlink = mt76_vif_conf_link(&dev->mt76, vif, link_conf); + struct mt7996_vif_link *link = mt7996_vif_conf_link(dev, vif, link_conf); + struct mt76_vif_link *mlink = link ? &link->mt76 : NULL; struct ieee80211_mutable_offsets offs; struct ieee80211_tx_info *info; struct sk_buff *skb, *rskb; struct tlv *tlv; struct bss_bcn_content_tlv *bcn; int len, extra_len = 0; + bool enabled = link_conf->enable_beacon; if (link_conf->nontransmitted) return 0; @@ -2769,13 +2771,16 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!mlink) return -EINVAL; + if (link->phy && link->phy->mt76->offchannel) + enabled = false; + rskb = __mt7996_mcu_alloc_bss_req(&dev->mt76, mlink, MT7996_MAX_BSS_OFFLOAD_SIZE); if (IS_ERR(rskb)) return PTR_ERR(rskb); skb = ieee80211_beacon_get_template(hw, vif, &offs, link_conf->link_id); - if (link_conf->enable_beacon && !skb) { + if (enabled && !skb) { dev_kfree_skb(rskb); return -EINVAL; } @@ -2794,7 +2799,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, len = ALIGN(sizeof(*bcn) + MT_TXD_SIZE + extra_len, 4); tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_BCN_CONTENT, len); bcn = (struct bss_bcn_content_tlv *)tlv; - bcn->enable = link_conf->enable_beacon; + bcn->enable = enabled; if (!bcn->enable) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 33ac16b64ef1..8509d508e1e1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -732,6 +732,7 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, struct ieee80211_key_conf *key, int pid, enum mt76_txq_id qid, u32 changed); +void mt7996_mac_update_beacons(struct mt7996_phy *phy); void mt7996_mac_set_coverage_class(struct mt7996_phy *phy); void mt7996_mac_work(struct work_struct *work); void mt7996_mac_reset_work(struct work_struct *work); -- cgit v1.2.3 From 4be3b46ec5190dc79cd38e3750480b2c66a791ad Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Fri, 8 Aug 2025 13:29:48 +0000 Subject: wifi: mt76: mt7996: use the correct vif link for scanning/roc restore fix which was dropped during MLO rework Fixes: f0b0b239b8f3 ("wifi: mt76: mt7996: rework mt7996_mac_write_txwi() for MLO support") Signed-off-by: Chad Monroe Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/180fffd409aa57f535a3d2c1951e41ae398ce09e.1754659732.git.chad@monroe.io Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index a9f7e5626dcd..d6531b74be1f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -903,8 +903,12 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, IEEE80211_TX_CTRL_MLO_LINK); mvif = vif ? (struct mt7996_vif *)vif->drv_priv : NULL; - if (mvif) - mlink = rcu_dereference(mvif->mt76.link[link_id]); + if (mvif) { + if (wcid->offchannel) + mlink = rcu_dereference(mvif->mt76.offchannel_link); + if (!mlink) + mlink = rcu_dereference(mvif->mt76.link[link_id]); + } if (mlink) { omac_idx = mlink->omac_idx; -- cgit v1.2.3 From 0300545b8a113e96ee260a7c142be846d391a620 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:47 +0200 Subject: wifi: mt76: mt7996: fix crash on some tx status reports When a wcid can't be found, link_sta can be stale from a previous batch. The code currently assumes that if link_sta is set, wcid is also non-zero. Fix wcid NULL pointer dereference by resetting link_sta when a wcid entry can't be found. Fixes: 62da647a2b20 ("wifi: mt76: mt7996: Add MLO support to mt7996_tx_check_aggr()") Link: https://patch.msgid.link/20250827085352.51636-1-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index d6531b74be1f..837deb41ae13 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1247,8 +1247,10 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); - if (!sta) + if (!sta) { + link_sta = NULL; goto next; + } link_sta = rcu_dereference(sta->link[wcid->link_id]); if (!link_sta) -- cgit v1.2.3 From a3c99ef88a084e1c2b99dd56bbfa7f89c9be3e92 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:48 +0200 Subject: wifi: mt76: do not add non-sta wcid entries to the poll list Polling and airtime reporting is valid for station entries only Link: https://patch.msgid.link/20250827085352.51636-2-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 3afe4c4cd7bb..6b2641a9ae9a 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1690,7 +1690,7 @@ EXPORT_SYMBOL_GPL(mt76_wcid_cleanup); void mt76_wcid_add_poll(struct mt76_dev *dev, struct mt76_wcid *wcid) { - if (test_bit(MT76_MCU_RESET, &dev->phy.state)) + if (test_bit(MT76_MCU_RESET, &dev->phy.state) || !wcid->sta) return; spin_lock_bh(&dev->sta_poll_lock); -- cgit v1.2.3 From 4a522b01e368eec58d182ecc47d24f49a39e440d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:49 +0200 Subject: wifi: mt76: mt7996: add missing check for rx wcid entries Non-station wcid entries must not be passed to the rx functions. In case of the global wcid entry, it could even lead to corruption in the wcid array due to pointer being casted to struct mt7996_sta_link using container_of. Fixes: 7464b12b7d92 ("wifi: mt76: mt7996: rework mt7996_rx_get_wcid to support MLO") Link: https://patch.msgid.link/20250827085352.51636-3-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 837deb41ae13..b3fcca9bbb95 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -62,7 +62,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, int i; wcid = mt76_wcid_ptr(dev, idx); - if (!wcid) + if (!wcid || !wcid->sta) return NULL; if (!mt7996_band_valid(dev, band_idx)) -- cgit v1.2.3 From 065c79df595af21d6d1b27d642860faa1d938774 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:50 +0200 Subject: wifi: mt76: mt7915: fix list corruption after hardware restart Since stations are recreated from scratch, all lists that wcids are added to must be cleared before calling ieee80211_restart_hw. Set wcid->sta = 0 for each wcid entry in order to ensure that they are not added again before they are ready. Fixes: 8a55712d124f ("wifi: mt76: mt7915: enable full system reset support") Link: https://patch.msgid.link/20250827085352.51636-4-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 37 +++++++++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 12 ++++---- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 6b2641a9ae9a..0e0d7b3bfe42 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -818,6 +818,43 @@ void mt76_free_device(struct mt76_dev *dev) } EXPORT_SYMBOL_GPL(mt76_free_device); +static void mt76_reset_phy(struct mt76_phy *phy) +{ + if (!phy) + return; + + INIT_LIST_HEAD(&phy->tx_list); +} + +void mt76_reset_device(struct mt76_dev *dev) +{ + int i; + + rcu_read_lock(); + for (i = 0; i < ARRAY_SIZE(dev->wcid); i++) { + struct mt76_wcid *wcid; + + wcid = rcu_dereference(dev->wcid[i]); + if (!wcid) + continue; + + wcid->sta = 0; + mt76_wcid_cleanup(dev, wcid); + rcu_assign_pointer(dev->wcid[i], NULL); + } + rcu_read_unlock(); + + INIT_LIST_HEAD(&dev->wcid_list); + INIT_LIST_HEAD(&dev->sta_poll_list); + dev->vif_mask = 0; + memset(dev->wcid_mask, 0, sizeof(dev->wcid_mask)); + + mt76_reset_phy(&dev->phy); + for (i = 0; i < ARRAY_SIZE(dev->phys); i++) + mt76_reset_phy(dev->phys[i]); +} +EXPORT_SYMBOL_GPL(mt76_reset_device); + struct mt76_phy *mt76_vif_phy(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 8dd5c29fb75b..127637454c82 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1243,6 +1243,7 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, struct ieee80211_rate *rates, int n_rates); void mt76_unregister_device(struct mt76_dev *dev); void mt76_free_device(struct mt76_dev *dev); +void mt76_reset_device(struct mt76_dev *dev); void mt76_unregister_phy(struct mt76_phy *phy); struct mt76_phy *mt76_alloc_radio_phy(struct mt76_dev *dev, unsigned int size, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 6639976afcee..1c0d310146d6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1460,17 +1460,15 @@ mt7915_mac_full_reset(struct mt7915_dev *dev) if (i == 10) dev_err(dev->mt76.dev, "chip full reset failed\n"); - spin_lock_bh(&dev->mt76.sta_poll_lock); - while (!list_empty(&dev->mt76.sta_poll_list)) - list_del_init(dev->mt76.sta_poll_list.next); - spin_unlock_bh(&dev->mt76.sta_poll_lock); - - memset(dev->mt76.wcid_mask, 0, sizeof(dev->mt76.wcid_mask)); - dev->mt76.vif_mask = 0; dev->phy.omac_mask = 0; if (phy2) phy2->omac_mask = 0; + mt76_reset_device(&dev->mt76); + + INIT_LIST_HEAD(&dev->sta_rc_list); + INIT_LIST_HEAD(&dev->twt_list); + i = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA); dev->mt76.global_wcid.idx = i; dev->recovery.hw_full_reset = false; -- cgit v1.2.3 From bdeac7815629c1a32b8784922368742e183747ea Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:51 +0200 Subject: wifi: mt76: free pending offchannel tx frames on wcid cleanup Avoid leaking them or keeping the wcid on the tx list Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Link: https://patch.msgid.link/20250827085352.51636-5-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 0e0d7b3bfe42..59adf3312617 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1716,6 +1716,10 @@ void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) skb_queue_splice_tail_init(&wcid->tx_pending, &list); spin_unlock(&wcid->tx_pending.lock); + spin_lock(&wcid->tx_offchannel.lock); + skb_queue_splice_tail_init(&wcid->tx_offchannel, &list); + spin_unlock(&wcid->tx_offchannel.lock); + spin_unlock_bh(&phy->tx_lock); while ((skb = __skb_dequeue(&list)) != NULL) { -- cgit v1.2.3 From 49fba87205bec14a0f6bd997635bf3968408161e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:52 +0200 Subject: wifi: mt76: fix linked list corruption Never leave scheduled wcid entries on the temporary on-stack list Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Link: https://patch.msgid.link/20250827085352.51636-6-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 03b042fdf997..8ab5840fee57 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -646,6 +646,7 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, static void mt76_txq_schedule_pending(struct mt76_phy *phy) { LIST_HEAD(tx_list); + int ret = 0; if (list_empty(&phy->tx_list)) return; @@ -657,13 +658,13 @@ static void mt76_txq_schedule_pending(struct mt76_phy *phy) list_splice_init(&phy->tx_list, &tx_list); while (!list_empty(&tx_list)) { struct mt76_wcid *wcid; - int ret; wcid = list_first_entry(&tx_list, struct mt76_wcid, tx_list); list_del_init(&wcid->tx_list); spin_unlock(&phy->tx_lock); - ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_offchannel); + if (ret >= 0) + ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_offchannel); if (ret >= 0 && !phy->offchannel) ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_pending); spin_lock(&phy->tx_lock); @@ -672,9 +673,6 @@ static void mt76_txq_schedule_pending(struct mt76_phy *phy) !skb_queue_empty(&wcid->tx_offchannel) && list_empty(&wcid->tx_list)) list_add_tail(&wcid->tx_list, &phy->tx_list); - - if (ret < 0) - break; } spin_unlock(&phy->tx_lock); -- cgit v1.2.3 From 479a54ab92087318514c82428a87af2d7af1a576 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Fri, 22 Aug 2025 11:52:19 +0800 Subject: netfilter: br_netfilter: do not check confirmed bit in br_nf_local_in() after confirm When send a broadcast packet to a tap device, which was added to a bridge, br_nf_local_in() is called to confirm the conntrack. If another conntrack with the same hash value is added to the hash table, which can be triggered by a normal packet to a non-bridge device, the below warning may happen. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 96 at net/bridge/br_netfilter_hooks.c:632 br_nf_local_in+0x168/0x200 CPU: 1 UID: 0 PID: 96 Comm: tap_send Not tainted 6.17.0-rc2-dirty #44 PREEMPT(voluntary) RIP: 0010:br_nf_local_in+0x168/0x200 Call Trace: nf_hook_slow+0x3e/0xf0 br_pass_frame_up+0x103/0x180 br_handle_frame_finish+0x2de/0x5b0 br_nf_hook_thresh+0xc0/0x120 br_nf_pre_routing_finish+0x168/0x3a0 br_nf_pre_routing+0x237/0x5e0 br_handle_frame+0x1ec/0x3c0 __netif_receive_skb_core+0x225/0x1210 __netif_receive_skb_one_core+0x37/0xa0 netif_receive_skb+0x36/0x160 tun_get_user+0xa54/0x10c0 tun_chr_write_iter+0x65/0xb0 vfs_write+0x305/0x410 ksys_write+0x60/0xd0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f ---[ end trace 0000000000000000 ]--- To solve the hash conflict, nf_ct_resolve_clash() try to merge the conntracks, and update skb->_nfct. However, br_nf_local_in() still use the old ct from local variable 'nfct' after confirm(), which leads to this warning. If confirm() does not insert the conntrack entry and return NF_DROP, the warning may also occur. There is no need to reserve the WARN_ON_ONCE, just remove it. Link: https://lore.kernel.org/netdev/20250820043329.2902014-1-wangliang74@huawei.com/ Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Suggested-by: Florian Westphal Signed-off-by: Wang Liang Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_hooks.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 94cbe967d1c1..083e2fe96441 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -626,9 +626,6 @@ static unsigned int br_nf_local_in(void *priv, break; } - ct = container_of(nfct, struct nf_conn, ct_general); - WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); - return ret; } #endif -- cgit v1.2.3 From 54416fd76770bd04fc3c501810e8d673550bab26 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 18 Aug 2025 13:22:20 +0200 Subject: netfilter: conntrack: helper: Replace -EEXIST by -EBUSY The helper registration return value is passed-through by module_init callbacks which modprobe confuses with the harmless -EEXIST returned when trying to load an already loaded module. Make sure modprobe fails so users notice their helper has not been registered and won't work. Suggested-by: Christophe Leroy Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4ed5878cb25b..ceb48c3ca0a4 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -368,7 +368,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { - ret = -EEXIST; + ret = -EBUSY; goto out; } } @@ -379,7 +379,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { - ret = -EEXIST; + ret = -EBUSY; goto out; } } -- cgit v1.2.3 From 224476613c8499f00ce4de975dd65749c5ca498c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 28 Aug 2025 09:55:26 +0300 Subject: wifi: iwlwifi: if scratch is ~0U, consider it a failure We want to see bits being set in the scratch register upon resume, but if all the bits are set, it means that we were kicked out of the PCI bus and that clearly doesn't mean we can assume the firmware is still alive after the suspend / resume cycle. Fixes: cb347bd29d0d ("wifi: iwlwifi: mvm: fix hibernation") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.0f203e559242.I59eff718cb5fda575db41081a1a389f7af488717@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b7add05f7a85..46d8c1922292 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1222,11 +1222,15 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, * but not bits [15:8]. So if we have bits set in lower word, assume * the device is alive. + * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer + * have access to the device and consider it powered off. * For older devices, just try silently to grab the NIC. */ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & - CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); + + if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || + scratch == ~0U) device_was_powered_off = true; } else { /* -- cgit v1.2.3 From 7bf2dfccc2dd70821104d15cbab7b6fca21872be Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:27 +0300 Subject: wifi: iwlwifi: acpi: check DSM func validity The DSM func 0 (DSM_FUNC_QUERY) returns a bitmap of which other functions contain valid data, query and check it before returning other functions data. Fixes: 9db93491f29e ("iwlwifi: acpi: support device specific method (DSM)") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220085 Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.881e17ff8f6a.Ic6d92997d9d5fad127919d6e1b830cd3fe944468@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 25 ++++++++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 8 ++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index bee7d92293b8..7ec22738b5d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -169,7 +169,7 @@ int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt, BUILD_BUG_ON(ARRAY_SIZE(acpi_dsm_size) != DSM_FUNC_NUM_FUNCS); - if (WARN_ON(func >= ARRAY_SIZE(acpi_dsm_size))) + if (WARN_ON(func >= ARRAY_SIZE(acpi_dsm_size) || !func)) return -EINVAL; expected_size = acpi_dsm_size[func]; @@ -178,6 +178,29 @@ int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt, if (expected_size != sizeof(u8) && expected_size != sizeof(u32)) return -EOPNOTSUPP; + if (!fwrt->acpi_dsm_funcs_valid) { + ret = iwl_acpi_get_dsm_integer(fwrt->dev, ACPI_DSM_REV, + DSM_FUNC_QUERY, + &iwl_guid, &tmp, + acpi_dsm_size[DSM_FUNC_QUERY]); + if (ret) { + /* always indicate BIT(0) to avoid re-reading */ + fwrt->acpi_dsm_funcs_valid = BIT(0); + return ret; + } + + IWL_DEBUG_RADIO(fwrt, "ACPI DSM validity bitmap 0x%x\n", + (u32)tmp); + /* always indicate BIT(0) to avoid re-reading */ + fwrt->acpi_dsm_funcs_valid = tmp | BIT(0); + } + + if (!(fwrt->acpi_dsm_funcs_valid & BIT(func))) { + IWL_DEBUG_RADIO(fwrt, "ACPI DSM %d not indicated as valid\n", + func); + return -ENODATA; + } + ret = iwl_acpi_get_dsm_integer(fwrt->dev, ACPI_DSM_REV, func, &iwl_guid, &tmp, expected_size); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index 0444a736c2b2..bd3bc2846cfa 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -113,6 +113,10 @@ struct iwl_txf_iter_data { * @phy_filters: specific phy filters as read from WPFC BIOS table * @ppag_bios_rev: PPAG BIOS revision * @ppag_bios_source: see &enum bios_source + * @acpi_dsm_funcs_valid: bitmap indicating which DSM values are valid, + * zero (default initialization) means it hasn't been read yet, + * and BIT(0) is set when it has since function 0 also has this + * bitmap and is always supported */ struct iwl_fw_runtime { struct iwl_trans *trans; @@ -189,6 +193,10 @@ struct iwl_fw_runtime { bool uats_valid; u8 uefi_tables_lock_status; struct iwl_phy_specific_cfg phy_filters; + +#ifdef CONFIG_ACPI + u32 acpi_dsm_funcs_valid; +#endif }; void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, -- cgit v1.2.3 From 1d33694462fa7da451846c39d653585b61375992 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:28 +0300 Subject: wifi: iwlwifi: uefi: check DSM item validity The first array index is a bitmap indicating which of the other values are valid. Check that bitmap before returning a value. Fixes: fc7214c3c986 ("wifi: iwlwifi: read DSM functions from UEFI") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220085 Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.59ec52ff865e.I9e11f497a029eb38f481b2c90c43c0935285216d@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index 48126ec6b94b..99a17b9323e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -747,6 +747,12 @@ int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func, goto out; } + if (!(data->functions[DSM_FUNC_QUERY] & BIT(func))) { + IWL_DEBUG_RADIO(fwrt, "DSM func %d not in 0x%x\n", + func, data->functions[DSM_FUNC_QUERY]); + goto out; + } + *value = data->functions[func]; IWL_DEBUG_RADIO(fwrt, -- cgit v1.2.3 From 75575e2d252afb29fdbcbeec4d67e042007add52 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 26 Aug 2025 20:26:01 +0300 Subject: wifi: mac80211: do not permit 40 MHz EHT operation on 5/6 GHz The EHT PHY requirements state that 80 MHz must be supported on the 5 and 6 GHz bands unless the STA is 20 MHz only. So if the channel width is limited to 40 MHz on a band other than 2.4 GHz, then disable EHT and downgrade to HE. The primary case where this can happen is if the hardware disables puncturing using IEEE80211_HW_DISALLOW_PUNCTURING. Signed-off-by: Benjamin Berg Cc: stable@vger.kernel.org Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250826202553.a6582f3abf57.Ic670429dc7127f68c818b4290d950ebfb5a0b9e1@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 ++++++++ net/mac80211/tests/chan-mode.c | 30 +++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1008eb8e9b13..dd650a127a31 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1189,6 +1189,14 @@ again: "required MCSes not supported, disabling EHT\n"); } + if (conn->mode >= IEEE80211_CONN_MODE_EHT && + channel->band != NL80211_BAND_2GHZ && + conn->bw_limit == IEEE80211_CONN_BW_LIMIT_40) { + conn->mode = IEEE80211_CONN_MODE_HE; + link_id_info(sdata, link_id, + "required bandwidth not supported, disabling EHT\n"); + } + /* the mode can only decrease, so this must terminate */ if (ap_mode != conn->mode) { kfree(elems); diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index 96c7b3ab2744..adc069065e73 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c @@ -2,7 +2,7 @@ /* * KUnit tests for channel mode functions * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include #include @@ -28,6 +28,10 @@ static const struct determine_chan_mode_case { u8 vht_basic_mcs_1_4, vht_basic_mcs_5_8; u8 he_basic_mcs_1_4, he_basic_mcs_5_8; u8 eht_mcs7_min_nss; + u16 eht_disabled_subchannels; + u8 eht_bw; + enum ieee80211_conn_bw_limit conn_bw_limit; + enum ieee80211_conn_bw_limit expected_bw_limit; int error; } determine_chan_mode_cases[] = { { @@ -128,6 +132,14 @@ static const struct determine_chan_mode_case { .conn_mode = IEEE80211_CONN_MODE_EHT, .eht_mcs7_min_nss = 0x15, .error = EINVAL, + }, { + .desc = "80 MHz EHT is downgraded to 40 MHz HE due to puncturing", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HE, + .conn_bw_limit = IEEE80211_CONN_BW_LIMIT_80, + .expected_bw_limit = IEEE80211_CONN_BW_LIMIT_40, + .eht_disabled_subchannels = 0x08, + .eht_bw = IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ, } }; KUNIT_ARRAY_PARAM_DESC(determine_chan_mode, determine_chan_mode_cases, desc) @@ -138,7 +150,7 @@ static void test_determine_chan_mode(struct kunit *test) struct t_sdata *t_sdata = T_SDATA(test); struct ieee80211_conn_settings conn = { .mode = params->conn_mode, - .bw_limit = IEEE80211_CONN_BW_LIMIT_20, + .bw_limit = params->conn_bw_limit, }; struct cfg80211_bss cbss = { .channel = &t_sdata->band_5ghz.channels[0], @@ -191,14 +203,21 @@ static void test_determine_chan_mode(struct kunit *test) 0x7f, 0x01, 0x00, 0x88, 0x88, 0x88, 0x00, 0x00, 0x00, /* EHT Operation */ - WLAN_EID_EXTENSION, 0x09, WLAN_EID_EXT_EHT_OPERATION, - 0x01, params->eht_mcs7_min_nss ? params->eht_mcs7_min_nss : 0x11, - 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, + WLAN_EID_EXTENSION, 0x0b, WLAN_EID_EXT_EHT_OPERATION, + 0x03, params->eht_mcs7_min_nss ? params->eht_mcs7_min_nss : 0x11, + 0x00, 0x00, 0x00, params->eht_bw, + params->eht_bw == IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ ? 42 : 36, + 0x00, + u16_get_bits(params->eht_disabled_subchannels, 0xff), + u16_get_bits(params->eht_disabled_subchannels, 0xff00), }; struct ieee80211_chan_req chanreq = {}; struct cfg80211_chan_def ap_chandef = {}; struct ieee802_11_elems *elems; + /* To force EHT downgrade to HE on punctured 80 MHz downgraded to 40 MHz */ + set_bit(IEEE80211_HW_DISALLOW_PUNCTURING, t_sdata->local.hw.flags); + if (params->strict) set_bit(IEEE80211_HW_STRICT, t_sdata->local.hw.flags); else @@ -237,6 +256,7 @@ static void test_determine_chan_mode(struct kunit *test) } else { KUNIT_ASSERT_NOT_ERR_OR_NULL(test, elems); KUNIT_ASSERT_EQ(test, conn.mode, params->expected_mode); + KUNIT_ASSERT_EQ(test, conn.bw_limit, params->expected_bw_limit); } } -- cgit v1.2.3 From 0e20450829ca3c1dbc2db536391537c57a40fe0b Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Fri, 15 Aug 2025 10:30:50 +0800 Subject: wifi: mwifiex: Initialize the chan_stats array to zero The adapter->chan_stats[] array is initialized in mwifiex_init_channel_scan_gap() with vmalloc(), which doesn't zero out memory. The array is filled in mwifiex_update_chan_statistics() and then the user can query the data in mwifiex_cfg80211_dump_survey(). There are two potential issues here. What if the user calls mwifiex_cfg80211_dump_survey() before the data has been filled in. Also the mwifiex_update_chan_statistics() function doesn't necessarily initialize the whole array. Since the array was not initialized at the start that could result in an information leak. Also this array is pretty small. It's a maximum of 900 bytes so it's more appropriate to use kcalloc() instead vmalloc(). Cc: stable@vger.kernel.org Fixes: bf35443314ac ("mwifiex: channel statistics support for mwifiex") Suggested-by: Dan Carpenter Signed-off-by: Qianfeng Rong Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20250815023055.477719-1-rongqianfeng@vivo.com Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 5 +++-- drivers/net/wireless/marvell/mwifiex/main.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 3498743d5ec0..4c8c7a5fdf23 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4673,8 +4673,9 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter) * additional active scan request for hidden SSIDs on passive channels. */ adapter->num_in_chan_stats = 2 * (n_channels_bg + n_channels_a); - adapter->chan_stats = vmalloc(array_size(sizeof(*adapter->chan_stats), - adapter->num_in_chan_stats)); + adapter->chan_stats = kcalloc(adapter->num_in_chan_stats, + sizeof(*adapter->chan_stats), + GFP_KERNEL); if (!adapter->chan_stats) return -ENOMEM; diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 7b50a88a18e5..1ec069bc8ea1 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -642,7 +642,7 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - vfree(adapter->chan_stats); + kfree(adapter->chan_stats); err_init_chan_scan: wiphy_unregister(adapter->wiphy); wiphy_free(adapter->wiphy); @@ -1485,7 +1485,7 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter) wiphy_free(adapter->wiphy); adapter->wiphy = NULL; - vfree(adapter->chan_stats); + kfree(adapter->chan_stats); mwifiex_free_cmd_buffers(adapter); } -- cgit v1.2.3 From 22e6bdb129ec64e640f5cccef9686f7c1a7d559b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:29 +0300 Subject: wifi: iwlwifi: cfg: restore some 1000 series configs In the fixed commit, I inadvertently removed two configurations while combining the 0x0083/0x0084 device IDs. Replace the fixed matches for the BG versions by a masked match and add the BGN version back with a similar masked match. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220477 Fixes: 1fb053d9876f ("wifi: iwlwifi: cfg: remove unnecessary configs") Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250828095500.fabb99c2df9e.If0ad87bf9ab360da5f613e879fd416c17c544733@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 46d8c1922292..f5e72c90dd57 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -729,10 +729,10 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { DEVICE(0x0083), SUBDEV_MASKED(0x5, 0xF)), IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, DEVICE(0x0083), SUBDEV_MASKED(0x6, 0xF)), + IWL_DEV_INFO(iwl1000_bgn_cfg, iwl1000_bgn_name, + DEVICE(0x0084), SUBDEV_MASKED(0x5, 0xF)), IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, - DEVICE(0x0084), SUBDEV(0x1216)), - IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, - DEVICE(0x0084), SUBDEV(0x1316)), + DEVICE(0x0084), SUBDEV_MASKED(0x6, 0xF)), /* 100 Series WiFi */ IWL_DEV_INFO(iwl100_bgn_cfg, iwl100_bgn_name, -- cgit v1.2.3 From 586e3cb33ba6890054b95aa0ade0a165890efabd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:30 +0300 Subject: wifi: iwlwifi: fix byte count table for old devices For devices handled by iwldvm, bc_table_dword was never set, but I missed that during the removal thereof. Change the logic to not treat the byte count table as dwords for devices older than 9000 series to fix that. Fixes: 6570ea227826 ("wifi: iwlwifi: remove bc_table_dword transport config") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.eccd7d3939f1.Ibaffa06d0b3aa5f35a9451d94af34de208b8a2bc@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c index 84a05cc1c27a..d912e709a92c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c @@ -2092,7 +2092,8 @@ static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans, break; } - if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && + trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) len = DIV_ROUND_UP(len, 4); if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX)) -- cgit v1.2.3 From 019f71a6760a6f89d388c3cd45622d1aae7d3641 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:31 +0300 Subject: wifi: iwlwifi: cfg: add back more lost PCI IDs Add back a few more PCI IDs to the config match table that evidently I lost during the cleanups. Fixes: 1fb053d9876f ("wifi: iwlwifi: cfg: remove unnecessary configs") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.46fee422651e.I8f6c3e9eea9523bb1658f5690b715eb443740e07@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f5e72c90dd57..f9e2095d6490 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -673,6 +673,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_sff_name, DEVICE(0x0082), SUBDEV_MASKED(0xC000, 0xF000)), + IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_sff_name, + DEVICE(0x0085), SUBDEV_MASKED(0xC000, 0xF000)), IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_d_name, DEVICE(0x0082), SUBDEV(0x4820)), IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_mow1_name, @@ -964,6 +966,12 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { DEVICE(0x24F3), SUBDEV(0x0004)), IWL_DEV_INFO(iwl8260_cfg, iwl8260_2n_name, DEVICE(0x24F3), SUBDEV(0x0044)), + IWL_DEV_INFO(iwl8260_cfg, iwl8260_2ac_name, + DEVICE(0x24F4)), + IWL_DEV_INFO(iwl8260_cfg, iwl4165_2ac_name, + DEVICE(0x24F5)), + IWL_DEV_INFO(iwl8260_cfg, iwl4165_2ac_name, + DEVICE(0x24F6)), IWL_DEV_INFO(iwl8265_cfg, iwl8265_2ac_name, DEVICE(0x24FD)), IWL_DEV_INFO(iwl8265_cfg, iwl8275_2ac_name, -- cgit v1.2.3 From 28010791193a4503f054e8d69a950ef815deb539 Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Wed, 27 Aug 2025 10:53:25 -0400 Subject: Bluetooth: vhci: Prevent use-after-free by removing debugfs files early Move the creation of debugfs files into a dedicated function, and ensure they are explicitly removed during vhci_release(), before associated data structures are freed. Previously, debugfs files such as "force_suspend", "force_wakeup", and others were created under hdev->debugfs but not removed in vhci_release(). Since vhci_release() frees the backing vhci_data structure, any access to these files after release would result in use-after-free errors. Although hdev->debugfs is later freed in hci_release_dev(), user can access files after vhci_data is freed but before hdev->debugfs is released. Fixes: ab4e4380d4e1 ("Bluetooth: Add vhci devcoredump support") Signed-off-by: Ivan Pravdin Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f7d8c3c00655..2fef08254d78 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -380,6 +380,28 @@ static const struct file_operations force_devcoredump_fops = { .write = force_devcd_write, }; +static void vhci_debugfs_init(struct vhci_data *data) +{ + struct hci_dev *hdev = data->hdev; + + debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, + &force_suspend_fops); + + debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, + &force_wakeup_fops); + + if (IS_ENABLED(CONFIG_BT_MSFTEXT)) + debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, + &msft_opcode_fops); + + if (IS_ENABLED(CONFIG_BT_AOSPEXT)) + debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, + &aosp_capable_fops); + + debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, + &force_devcoredump_fops); +} + static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; @@ -434,22 +456,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) return -EBUSY; } - debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, - &force_suspend_fops); - - debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, - &force_wakeup_fops); - - if (IS_ENABLED(CONFIG_BT_MSFTEXT)) - debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, - &msft_opcode_fops); - - if (IS_ENABLED(CONFIG_BT_AOSPEXT)) - debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, - &aosp_capable_fops); - - debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, - &force_devcoredump_fops); + if (!IS_ERR_OR_NULL(hdev->debugfs)) + vhci_debugfs_init(data); hci_skb_pkt_type(skb) = HCI_VENDOR_PKT; @@ -651,6 +659,21 @@ static int vhci_open(struct inode *inode, struct file *file) return 0; } +static void vhci_debugfs_remove(struct hci_dev *hdev) +{ + debugfs_lookup_and_remove("force_suspend", hdev->debugfs); + + debugfs_lookup_and_remove("force_wakeup", hdev->debugfs); + + if (IS_ENABLED(CONFIG_BT_MSFTEXT)) + debugfs_lookup_and_remove("msft_opcode", hdev->debugfs); + + if (IS_ENABLED(CONFIG_BT_AOSPEXT)) + debugfs_lookup_and_remove("aosp_capable", hdev->debugfs); + + debugfs_lookup_and_remove("force_devcoredump", hdev->debugfs); +} + static int vhci_release(struct inode *inode, struct file *file) { struct vhci_data *data = file->private_data; @@ -662,6 +685,8 @@ static int vhci_release(struct inode *inode, struct file *file) hdev = data->hdev; if (hdev) { + if (!IS_ERR_OR_NULL(hdev->debugfs)) + vhci_debugfs_remove(hdev); hci_unregister_dev(hdev); hci_free_dev(hdev); } -- cgit v1.2.3 From 862c628108562d8c7a516a900034823b381d3cba Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 27 Aug 2025 20:40:14 +0000 Subject: Bluetooth: Fix use-after-free in l2cap_sock_cleanup_listen() syzbot reported the splat below without a repro. In the splat, a single thread calling bt_accept_dequeue() freed sk and touched it after that. The root cause would be the racy l2cap_sock_cleanup_listen() call added by the cited commit. bt_accept_dequeue() is called under lock_sock() except for l2cap_sock_release(). Two threads could see the same socket during the list iteration in bt_accept_dequeue(): CPU1 CPU2 (close()) ---- ---- sock_hold(sk) sock_hold(sk); lock_sock(sk) <-- block close() sock_put(sk) bt_accept_unlink(sk) sock_put(sk) <-- refcnt by bt_accept_enqueue() release_sock(sk) lock_sock(sk) sock_put(sk) bt_accept_unlink(sk) sock_put(sk) <-- last refcnt bt_accept_unlink(sk) <-- UAF Depending on the timing, the other thread could show up in the "Freed by task" part. Let's call l2cap_sock_cleanup_listen() under lock_sock() in l2cap_sock_release(). [0]: BUG: KASAN: slab-use-after-free in debug_spin_lock_before kernel/locking/spinlock_debug.c:86 [inline] BUG: KASAN: slab-use-after-free in do_raw_spin_lock+0x26f/0x2b0 kernel/locking/spinlock_debug.c:115 Read of size 4 at addr ffff88803b7eb1c4 by task syz.5.3276/16995 CPU: 3 UID: 0 PID: 16995 Comm: syz.5.3276 Not tainted syzkaller #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcd/0x630 mm/kasan/report.c:482 kasan_report+0xe0/0x110 mm/kasan/report.c:595 debug_spin_lock_before kernel/locking/spinlock_debug.c:86 [inline] do_raw_spin_lock+0x26f/0x2b0 kernel/locking/spinlock_debug.c:115 spin_lock_bh include/linux/spinlock.h:356 [inline] release_sock+0x21/0x220 net/core/sock.c:3746 bt_accept_dequeue+0x505/0x600 net/bluetooth/af_bluetooth.c:312 l2cap_sock_cleanup_listen+0x5c/0x2a0 net/bluetooth/l2cap_sock.c:1451 l2cap_sock_release+0x5c/0x210 net/bluetooth/l2cap_sock.c:1425 __sock_release+0xb3/0x270 net/socket.c:649 sock_close+0x1c/0x30 net/socket.c:1439 __fput+0x3ff/0xb70 fs/file_table.c:468 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xeb/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x3f6/0x4c0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f2accf8ebe9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffdb6cb1378 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00000000000426fb RCX: 00007f2accf8ebe9 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f2acd1b7da0 R08: 0000000000000001 R09: 00000012b6cb166f R10: 0000001b30e20000 R11: 0000000000000246 R12: 00007f2acd1b609c R13: 00007f2acd1b6090 R14: ffffffffffffffff R15: 00007ffdb6cb1490 Allocated by task 5326: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:388 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:405 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4365 [inline] __kmalloc_noprof+0x223/0x510 mm/slub.c:4377 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0x1a8/0x2a0 net/core/sock.c:2239 sk_alloc+0x36/0xc20 net/core/sock.c:2295 bt_sock_alloc+0x3b/0x3a0 net/bluetooth/af_bluetooth.c:151 l2cap_sock_alloc.constprop.0+0x33/0x1d0 net/bluetooth/l2cap_sock.c:1894 l2cap_sock_new_connection_cb+0x101/0x240 net/bluetooth/l2cap_sock.c:1482 l2cap_connect_cfm+0x4c4/0xf80 net/bluetooth/l2cap_core.c:7287 hci_connect_cfm include/net/bluetooth/hci_core.h:2050 [inline] hci_remote_features_evt+0x4dd/0x970 net/bluetooth/hci_event.c:3712 hci_event_func net/bluetooth/hci_event.c:7519 [inline] hci_event_packet+0xa0d/0x11c0 net/bluetooth/hci_event.c:7573 hci_rx_work+0x2c5/0x16b0 net/bluetooth/hci_core.c:4071 process_one_work+0x9cf/0x1b70 kernel/workqueue.c:3236 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:463 ret_from_fork+0x5d7/0x6f0 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Freed by task 16995: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:243 [inline] __kasan_slab_free+0x60/0x70 mm/kasan/common.c:275 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2417 [inline] slab_free mm/slub.c:4680 [inline] kfree+0x2b4/0x4d0 mm/slub.c:4879 sk_prot_free net/core/sock.c:2278 [inline] __sk_destruct+0x75f/0x9a0 net/core/sock.c:2373 sk_destruct+0xc2/0xf0 net/core/sock.c:2401 __sk_free+0xf4/0x3e0 net/core/sock.c:2412 sk_free+0x6a/0x90 net/core/sock.c:2423 sock_put include/net/sock.h:1960 [inline] bt_accept_unlink+0x245/0x2e0 net/bluetooth/af_bluetooth.c:262 bt_accept_dequeue+0x517/0x600 net/bluetooth/af_bluetooth.c:308 l2cap_sock_cleanup_listen+0x5c/0x2a0 net/bluetooth/l2cap_sock.c:1451 l2cap_sock_release+0x5c/0x210 net/bluetooth/l2cap_sock.c:1425 __sock_release+0xb3/0x270 net/socket.c:649 sock_close+0x1c/0x30 net/socket.c:1439 __fput+0x3ff/0xb70 fs/file_table.c:468 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xeb/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x3f6/0x4c0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 1728137b33c0 ("Bluetooth: L2CAP: Fix use-after-free in l2cap_sock_ready_cb") Reported-by: syzbot+e5e64cdf8e92046dd3e1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-bluetooth/68af6b9d.a70a0220.3cafd4.0032.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f4257c4d3052..814fb8610ac4 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1422,7 +1422,10 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + lock_sock_nested(sk, L2CAP_NESTING_PARENT); l2cap_sock_cleanup_listen(sk); + release_sock(sk); + bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); -- cgit v1.2.3 From d6a367ec6c96fc8e61b4d67e69df03565ec69fb7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 28 Aug 2025 23:49:18 +0200 Subject: netfilter: nft_flowtable.sh: re-run with random mtu sizes Jakub says: nft_flowtable.sh is one of the most flake-atious test for netdev CI currently :( The root cause is two-fold: 1. the failing part of the test is supposed to make sure that ip fragments are forwarded for offloaded flows. (flowtable has to pass them to classic forward path). path mtu discovery for these subtests is disabled. 2. nft_flowtable.sh has two passes. One with fixed mtus/file size and one where link mtus and file sizes are random. The CI failures all have same pattern: re-run with random mtus and file size: -o 27663 -l 4117 -r 10089 -s 54384840 [..] PASS: dscp_egress: dscp packet counters match FAIL: file mismatch for ns1 -> ns2 In some cases this error triggers a bit ealier, sometimes in a later subtest: re-run with random mtus and file size: -o 20201 -l 4555 -r 12657 -s 9405856 [..] PASS: dscp_egress: dscp packet counters match PASS: dscp_fwd: dscp packet counters match 2025/08/17 20:37:52 socat[18954] E write(7, 0x560716b96000, 8192): Broken pipe FAIL: file mismatch for ns1 -> ns2 -rw------- 1 root root 9405856 Aug 17 20:36 /tmp/tmp.2n63vlTrQe But all logs I saw show same scenario: 1. Failing tests have pmtu discovery off (i.e., ip fragmentation) 2. The test file is much larger than first-pass default (2M Byte) 3. peers have much larger MTUs compared to the 'network'. These errors are very reproducible when re-running the test with the same commandline arguments. The timeout became much more prominent with 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks"): reassembled packets typically have a skb->truesize more than double the skb length. As that commit is intentional and pmtud-off with large-tcp-packets-as-fragments is not normal adjust the test to use a smaller file for the pmtu-off subtests. While at it, add more information to pass/fail messages and also run the dscp alteration subtest with pmtu discovery enabled. Link: https://netdev.bots.linux.dev/contest.html?test=nft-flowtable-sh Fixes: f84ab634904c ("selftests: netfilter: nft_flowtable.sh: re-run with random mtu sizes") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250822071330.4168f0db@kernel.org/ Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250828214918.3385-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- .../selftests/net/netfilter/nft_flowtable.sh | 113 ++++++++++++++------- 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..45832df98295 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +271,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +284,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +328,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +351,39 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local dstip=$4 + local dstport=$5 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +393,16 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" ip netns exec "$nsr1" nft -f - <&2 @@ -489,8 +519,9 @@ table ip nat { } EOF +check_dscp "dscp_none" "0" if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -512,6 +543,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # are lower than file size and packets were forwarded via flowtable layer. # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then @@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -668,7 +707,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret -- cgit v1.2.3 From 9f74c0ea9b26d1505d55b61e36b1623dd347e1d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 16:23:52 +0000 Subject: net_sched: gen_estimator: fix est_timer() vs CONFIG_PREEMPT_RT=y syzbot reported a WARNING in est_timer() [1] Problem here is that with CONFIG_PREEMPT_RT=y, timer callbacks can be preempted. Adopt preempt_disable_nested()/preempt_enable_nested() to fix this. [1] WARNING: CPU: 0 PID: 16 at ./include/linux/seqlock.h:221 __seqprop_assert include/linux/seqlock.h:221 [inline] WARNING: CPU: 0 PID: 16 at ./include/linux/seqlock.h:221 est_timer+0x6dc/0x9f0 net/core/gen_estimator.c:93 Modules linked in: CPU: 0 UID: 0 PID: 16 Comm: ktimers/0 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:__seqprop_assert include/linux/seqlock.h:221 [inline] RIP: 0010:est_timer+0x6dc/0x9f0 net/core/gen_estimator.c:93 Call Trace: call_timer_fn+0x17e/0x5f0 kernel/time/timer.c:1747 expire_timers kernel/time/timer.c:1798 [inline] __run_timers kernel/time/timer.c:2372 [inline] __run_timer_base+0x648/0x970 kernel/time/timer.c:2384 run_timer_base kernel/time/timer.c:2393 [inline] run_timer_softirq+0xb7/0x180 kernel/time/timer.c:2403 handle_softirqs+0x22c/0x710 kernel/softirq.c:579 __do_softirq kernel/softirq.c:613 [inline] run_ktimerd+0xcf/0x190 kernel/softirq.c:1043 smpboot_thread_fn+0x53f/0xa60 kernel/smpboot.c:160 kthread+0x70e/0x8a0 kernel/kthread.c:463 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+72db9ee39db57c3fecc5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68adf6fa.a70a0220.3cafd4.0000.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250827162352.3960779-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gen_estimator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 7d426a8e29f3..f112156db587 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -90,10 +90,12 @@ static void est_timer(struct timer_list *t) rate = (b_packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); + preempt_disable_nested(); write_seqcount_begin(&est->seq); est->avbps += brate; est->avpps += rate; write_seqcount_end(&est->seq); + preempt_enable_nested(); est->last_bytes = b_bytes; est->last_packets = b_packets; -- cgit v1.2.3 From b79e498080b170fd94fc83bca2471f450811549b Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 27 Aug 2025 12:26:43 -0700 Subject: xirc2ps_cs: fix register access when enabling FullDuplex The current code incorrectly passes (XIRCREG1_ECR | FullDuplex) as the register address to GetByte(), instead of fetching the register value and OR-ing it with FullDuplex. This results in an invalid register access. Fix it by reading XIRCREG1_ECR first, then or-ing with FullDuplex before writing it back. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827192645.658496-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index a31d5d5e6593..97e88886253f 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1576,7 +1576,7 @@ do_reset(struct net_device *dev, int full) msleep(40); /* wait 40 msec to let it complete */ } if (full_duplex) - PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR | FullDuplex)); + PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR) | FullDuplex); } else { /* No MII */ SelectPage(0); value = GetByte(XIRCREG_ESR); /* read the ESR */ -- cgit v1.2.3 From 8bf935cf789872350b04c1a6468b0a509f67afb2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 28 Aug 2025 16:29:49 +0800 Subject: ptp: ocp: fix use-after-free bugs causing by ptp_ocp_watchdog The ptp_ocp_detach() only shuts down the watchdog timer if it is pending. However, if the timer handler is already running, the timer_delete_sync() is not called. This leads to race conditions where the devlink that contains the ptp_ocp is deallocated while the timer handler is still accessing it, resulting in use-after-free bugs. The following details one of the race scenarios. (thread 1) | (thread 2) ptp_ocp_remove() | ptp_ocp_detach() | ptp_ocp_watchdog() if (timer_pending(&bp->watchdog))| bp = timer_container_of() timer_delete_sync() | | devlink_free(devlink) //free | | bp-> //use Resolve this by unconditionally calling timer_delete_sync() to ensure the timer is reliably deactivated, preventing any access after free. Fixes: 773bda964921 ("ptp: ocp: Expose various resources on the timecard.") Signed-off-by: Duoming Zhou Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250828082949.28189-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index d39073dc4072..4e1286ce05c9 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -4557,8 +4557,7 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_debugfs_remove_device(bp); ptp_ocp_detach_sysfs(bp); ptp_ocp_attr_group_del(bp); - if (timer_pending(&bp->watchdog)) - timer_delete_sync(&bp->watchdog); + timer_delete_sync(&bp->watchdog); if (bp->ts0) ptp_ocp_unregister_ext(bp->ts0); if (bp->ts1) -- cgit v1.2.3 From 0704a3da7ce50f972e898bbda88d2692a22922d9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 28 Aug 2025 16:14:57 +0800 Subject: mISDN: Fix memory leak in dsp_hwec_enable() dsp_hwec_enable() allocates dup pointer by kstrdup(arg), but then it updates dup variable by strsep(&dup, ","). As a result when it calls kfree(dup), the dup variable may be a modified pointer that no longer points to the original allocated memory, causing a memory leak. The issue is the same pattern as fixed in commit c6a502c22999 ("mISDN: Fix memory leak in dsp_pipeline_build()"). Fixes: 9a4381618262 ("mISDN: Remove VLAs") Signed-off-by: Miaoqian Lin Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250828081457.36061-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/dsp_hwec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c index 0b3f29195330..0cd216e28f00 100644 --- a/drivers/isdn/mISDN/dsp_hwec.c +++ b/drivers/isdn/mISDN/dsp_hwec.c @@ -51,14 +51,14 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg) goto _do; { - char *dup, *tok, *name, *val; + char *dup, *next, *tok, *name, *val; int tmp; - dup = kstrdup(arg, GFP_ATOMIC); + dup = next = kstrdup(arg, GFP_ATOMIC); if (!dup) return; - while ((tok = strsep(&dup, ","))) { + while ((tok = strsep(&next, ","))) { if (!strlen(tok)) continue; name = strsep(&tok, "="); -- cgit v1.2.3 From b3852ae3105ec1048535707545d23c1e519c190f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:39 +0200 Subject: net: ethernet: oa_tc6: Handle failure of spi_setup There is no guarantee that spi_setup succeed, so properly handle the error case. Fixes: aa58bec064ab ("net: ethernet: oa_tc6: implement register write operation") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Andrew Lunn Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/oa_tc6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c index db200e4ec284..91a906a7918a 100644 --- a/drivers/net/ethernet/oa_tc6.c +++ b/drivers/net/ethernet/oa_tc6.c @@ -1249,7 +1249,8 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev) /* Set the SPI controller to pump at realtime priority */ tc6->spi->rt = true; - spi_setup(tc6->spi); + if (spi_setup(tc6->spi) < 0) + return NULL; tc6->spi_ctrl_tx_buf = devm_kzalloc(&tc6->spi->dev, OA_TC6_CTRL_SPI_BUF_SIZE, -- cgit v1.2.3 From c7217963eb779be0a7627dd2121152fa6786ecf7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:40 +0200 Subject: microchip: lan865x: Fix module autoloading Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from spi_device_id table. While at this, fix the misleading variable name (spidev is unrelated to this driver). Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Andrew Lunn Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 84c41f193561..9d94c8fb8b91 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -423,10 +423,11 @@ static void lan865x_remove(struct spi_device *spi) free_netdev(priv->netdev); } -static const struct spi_device_id spidev_spi_ids[] = { +static const struct spi_device_id lan865x_ids[] = { { .name = "lan8650" }, {}, }; +MODULE_DEVICE_TABLE(spi, lan865x_ids); static const struct of_device_id lan865x_dt_ids[] = { { .compatible = "microchip,lan8650" }, @@ -441,7 +442,7 @@ static struct spi_driver lan865x_driver = { }, .probe = lan865x_probe, .remove = lan865x_remove, - .id_table = spidev_spi_ids, + .id_table = lan865x_ids, }; module_spi_driver(lan865x_driver); -- cgit v1.2.3 From ca47c44d36a9ad3268d17f89789104a471c07f81 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:41 +0200 Subject: microchip: lan865x: Fix LAN8651 autoloading Add missing IDs for LAN8651 devices, which are also defined in the DT bindings. Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 9d94c8fb8b91..79b800d2b72c 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -425,12 +425,14 @@ static void lan865x_remove(struct spi_device *spi) static const struct spi_device_id lan865x_ids[] = { { .name = "lan8650" }, + { .name = "lan8651" }, {}, }; MODULE_DEVICE_TABLE(spi, lan865x_ids); static const struct of_device_id lan865x_dt_ids[] = { { .compatible = "microchip,lan8650" }, + { .compatible = "microchip,lan8651" }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, lan865x_dt_ids); -- cgit v1.2.3 From d77b6ff0ce35a6d0b0b7b9581bc3f76d041d4087 Mon Sep 17 00:00:00 2001 From: Stanislav Fort Date: Sun, 31 Aug 2025 16:56:23 +0200 Subject: batman-adv: fix OOB read/write in network-coding decode batadv_nc_skb_decode_packet() trusts coded_len and checks only against skb->len. XOR starts at sizeof(struct batadv_unicast_packet), reducing payload headroom, and the source skb length is not verified, allowing an out-of-bounds read and a small out-of-bounds write. Validate that coded_len fits within the payload area of both destination and source sk_buffs before XORing. Fixes: 2df5278b0267 ("batman-adv: network coding - receive coded packets and decode them") Cc: stable@vger.kernel.org Reported-by: Stanislav Fort Signed-off-by: Stanislav Fort Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/network-coding.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 9f56308779cc..af97d077369f 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1687,7 +1687,12 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, coding_len = ntohs(coded_packet_tmp.coded_len); - if (coding_len > skb->len) + /* ensure dst buffer is large enough (payload only) */ + if (coding_len + h_size > skb->len) + return NULL; + + /* ensure src buffer is large enough (payload only) */ + if (coding_len + h_size > nc_packet->skb->len) return NULL; /* Here the magic is reversed: -- cgit v1.2.3 From 49c2502b5946ebf454d7e16fd0189769a82b6117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 30 Aug 2025 11:38:42 -0700 Subject: selftests: drv-net: csum: fix interface name for remote host Use cfg.remote_ifname for arguments of remote command. Without this UDP tests fail in NIPA where local interface is called enp1s0 and remote enp0s4. Fixes: 1d0dc857b5d8 ("selftests: drv-net: add checksum tests") Reviewed-by: Willem de Bruijn Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250830183842.688935-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/csum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None): ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None): if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): -- cgit v1.2.3 From 7000f4fa9b24ae2511b07babd0d49e888db5d265 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 29 Aug 2025 23:23:27 -0700 Subject: bnxt_en: fix incorrect page count in RX aggr ring log The warning in bnxt_alloc_one_rx_ring_netmem() reports the number of pages allocated for the RX aggregation ring. However, it mistakenly used bp->rx_ring_size instead of bp->rx_agg_ring_size, leading to confusing or misleading log output. Use the correct bp->rx_agg_ring_size value to fix this. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Alok Tiwari Reviewed-by: Jacob Keller Reviewed-by: Michael Chan Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250830062331.783783-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 31e3d825b4bc..0daa08cecaf2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4397,7 +4397,7 @@ static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp, for (i = 0; i < bp->rx_agg_ring_size; i++) { if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) { netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n", - ring_nr, i, bp->rx_ring_size); + ring_nr, i, bp->rx_agg_ring_size); break; } prod = NEXT_RX_AGG(prod); -- cgit v1.2.3 From c6dd1aa2cbb72b33e0569f3e71d95792beab5042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Bl=C3=A4se?= Date: Thu, 28 Aug 2025 11:14:35 +0200 Subject: icmp: fix icmp_ndo_send address translation for reply direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The icmp_ndo_send function was originally introduced to ensure proper rate limiting when icmp_send is called by a network device driver, where the packet's source address may have already been transformed by SNAT. However, the original implementation only considers the IP_CT_DIR_ORIGINAL direction for SNAT and always replaced the packet's source address with that of the original-direction tuple. This causes two problems: 1. For SNAT: Reply-direction packets were incorrectly translated using the source address of the CT original direction, even though no translation is required. 2. For DNAT: Reply-direction packets were not handled at all. In DNAT, the original direction's destination is translated. Therefore, in the reply direction the source address must be set to the reply-direction source, so rate limiting works as intended. Fix this by using the connection direction to select the correct tuple for source address translation, and adjust the pre-checks to handle reply-direction packets in case of DNAT. Additionally, wrap the `ct->status` access in READ_ONCE(). This avoids possible KCSAN reports about concurrent updates to `ct->status`. Fixes: 0b41713b6066 ("icmp: introduce helper for nat'd source address in network device context") Signed-off-by: Fabian Bläse Cc: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 6 ++++-- net/ipv6/ip6_icmp.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2ffe73ea644f..c48c572f024d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -799,11 +799,12 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct sk_buff *cloned_skb = NULL; struct ip_options opts = { 0 }; enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; struct nf_conn *ct; __be32 orig_ip; ct = nf_ct_get(skb_in, &ctinfo); - if (!ct || !(ct->status & IPS_SRC_NAT)) { + if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { __icmp_send(skb_in, type, code, info, &opts); return; } @@ -818,7 +819,8 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) goto out; orig_ip = ip_hdr(skb_in)->saddr; - ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + dir = CTINFO2DIR(ctinfo); + ip_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.ip; __icmp_send(skb_in, type, code, info, &opts); ip_hdr(skb_in)->saddr = orig_ip; out: diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 9e3574880cb0..233914b63bdb 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -54,11 +54,12 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) struct inet6_skb_parm parm = { 0 }; struct sk_buff *cloned_skb = NULL; enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; struct in6_addr orig_ip; struct nf_conn *ct; ct = nf_ct_get(skb_in, &ctinfo); - if (!ct || !(ct->status & IPS_SRC_NAT)) { + if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { __icmpv6_send(skb_in, type, code, info, &parm); return; } @@ -73,7 +74,8 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) goto out; orig_ip = ipv6_hdr(skb_in)->saddr; - ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + dir = CTINFO2DIR(ctinfo); + ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6; __icmpv6_send(skb_in, type, code, info, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: -- cgit v1.2.3 From e580beaf43d563aaf457f1c7f934002355ebfe7b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 28 Aug 2025 20:18:58 +0800 Subject: eth: mlx4: Fix IS_ERR() vs NULL check bug in mlx4_en_create_rx_ring Replace NULL check with IS_ERR() check after calling page_pool_create() since this function returns error pointers (ERR_PTR). Using NULL check could lead to invalid pointer dereference. Fixes: 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") Signed-off-by: Miaoqian Lin Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250828121858.67639-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 92a16ddb7d86..13666d50b90f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -267,8 +267,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, pp.dma_dir = priv->dma_dir; ring->pp = page_pool_create(&pp); - if (!ring->pp) + if (IS_ERR(ring->pp)) { + err = PTR_ERR(ring->pp); goto err_ring; + } if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) goto err_pp; -- cgit v1.2.3 From b434a3772dca1c90a40e8ec69230caa55c18ef84 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 29 Aug 2025 15:48:42 +0900 Subject: docs: remove obsolete description about threaded NAPI Commit 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") introduced threaded NAPI configuration per individual NAPI instance, however obsolete description that threaded NAPI is per device has remained. Remove the old description and clarify that only NAPI instances running in threaded mode spawn kernel threads by changing "Each NAPI instance" to "Each threaded NAPI instance". Signed-off-by: Kohei Enju Reviewed-by: Samiullah Khawaja Link: https://patch.msgid.link/20250829064857.51503-1-enjuk@amazon.com Signed-off-by: Jakub Kicinski --- Documentation/networking/napi.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index a15754adb041..7dd60366f4ff 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -433,9 +433,8 @@ Threaded NAPI Threaded NAPI is an operating mode that uses dedicated kernel threads rather than software IRQ context for NAPI processing. -The configuration is per netdevice and will affect all -NAPI instances of that device. Each NAPI instance will spawn a separate -thread (called ``napi/${ifc-name}-${napi-id}``). +Each threaded NAPI instance will spawn a separate thread +(called ``napi/${ifc-name}-${napi-id}``). It is recommended to pin each kernel thread to a single CPU, the same CPU as the CPU which services the interrupt. Note that the mapping -- cgit v1.2.3 From 6bc8a5098bf4a365c4086a4a4130bfab10a58260 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 29 Aug 2025 10:35:21 -0400 Subject: net: macb: Fix tx_ptr_lock locking macb_start_xmit and macb_tx_poll can be called with bottom-halves disabled (e.g. from softirq) as well as with interrupts disabled (with netpoll). Because of this, all other functions taking tx_ptr_lock must use spin_lock_irqsave. Fixes: 138badbc21a0 ("net: macb: use NAPI for TX completion path") Reported-by: Mike Galbraith Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250829143521.1686062-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 16d28a8b3b56..c769b7dbd3ba 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1223,12 +1223,13 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; u16 queue_index = queue - bp->queues; + unsigned long flags; unsigned int tail; unsigned int head; int packets = 0; u32 bytes = 0; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); head = queue->tx_head; for (tail = queue->tx_tail; tail != head && packets < budget; tail++) { struct macb_tx_skb *tx_skb; @@ -1291,7 +1292,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) CIRC_CNT(queue->tx_head, queue->tx_tail, bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp)) netif_wake_subqueue(bp->dev, queue_index); - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return packets; } @@ -1707,8 +1708,9 @@ static void macb_tx_restart(struct macb_queue *queue) { struct macb *bp = queue->bp; unsigned int head_idx, tbqp; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head == queue->tx_tail) goto out_tx_ptr_unlock; @@ -1720,19 +1722,20 @@ static void macb_tx_restart(struct macb_queue *queue) if (tbqp == head_idx) goto out_tx_ptr_unlock; - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); out_tx_ptr_unlock: - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); } static bool macb_tx_complete_pending(struct macb_queue *queue) { bool retval = false; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head != queue->tx_tail) { /* Make hw descriptor updates visible to CPU */ rmb(); @@ -1740,7 +1743,7 @@ static bool macb_tx_complete_pending(struct macb_queue *queue) if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED)) retval = true; } - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return retval; } @@ -2308,6 +2311,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) struct macb_queue *queue = &bp->queues[queue_index]; unsigned int desc_cnt, nr_frags, frag_size, f; unsigned int hdrlen; + unsigned long flags; bool is_lso; netdev_tx_t ret = NETDEV_TX_OK; @@ -2368,7 +2372,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length); } - spin_lock_bh(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); /* This is a hard error, log it. */ if (CIRC_SPACE(queue->tx_head, queue->tx_tail, @@ -2392,15 +2396,15 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index), skb->len); - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) netif_stop_subqueue(dev, queue_index); unlock: - spin_unlock_bh(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return ret; } -- cgit v1.2.3 From 030e1c45666629f72d0fc1d040f9d2915680de8e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 29 Aug 2025 20:55:40 +0200 Subject: macsec: read MACSEC_SA_ATTR_PN with nla_get_uint The code currently reads both U32 attributes and U64 attributes as U64, so when a U32 attribute is provided by userspace (ie, when not using XPN), on big endian systems, we'll load that value into the upper 32bits of the next_pn field instead of the lower 32bits. This means that the value that userspace provided is ignored (we only care about the lower 32bits for non-XPN), and we'll start using PNs from 0. Switch to nla_get_uint, which will read the value correctly on all arches, whether it's 32b or 64b. Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://patch.msgid.link/1c1df1661b89238caf5beefb84a10ebfd56c66ea.1756459839.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 4c75d1fea552..01329fe7451a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1844,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); - rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } @@ -2086,7 +2086,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) } spin_lock_bh(&tx_sa->lock); - tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) @@ -2398,7 +2398,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&tx_sa->lock); prev_pn = tx_sa->next_pn_halves; - tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } @@ -2496,7 +2496,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&rx_sa->lock); prev_pn = rx_sa->next_pn_halves; - rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } -- cgit v1.2.3 From ba1e9421cf1a8369d25c3832439702a015d6b5f9 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 28 Aug 2025 20:41:17 +0800 Subject: net/smc: fix one NULL pointer dereference in smc_ib_is_sg_need_sync() BUG: kernel NULL pointer dereference, address: 00000000000002ec PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 28 UID: 0 PID: 343 Comm: kworker/28:1 Kdump: loaded Tainted: G OE 6.17.0-rc2+ #9 NONE Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Workqueue: smc_hs_wq smc_listen_work [smc] RIP: 0010:smc_ib_is_sg_need_sync+0x9e/0xd0 [smc] ... Call Trace: smcr_buf_map_link+0x211/0x2a0 [smc] __smc_buf_create+0x522/0x970 [smc] smc_buf_create+0x3a/0x110 [smc] smc_find_rdma_v2_device_serv+0x18f/0x240 [smc] ? smc_vlan_by_tcpsk+0x7e/0xe0 [smc] smc_listen_find_device+0x1dd/0x2b0 [smc] smc_listen_work+0x30f/0x580 [smc] process_one_work+0x18c/0x340 worker_thread+0x242/0x360 kthread+0xe7/0x220 ret_from_fork+0x13a/0x160 ret_from_fork_asm+0x1a/0x30 If the software RoCE device is used, ibdev->dma_device is a null pointer. As a result, the problem occurs. Null pointer detection is added to prevent problems. Fixes: 0ef69e788411c ("net/smc: optimize for smc_sndbuf_sync_sg_for_device and smc_rmb_sync_sg_for_cpu") Signed-off-by: Liu Jian Reviewed-by: Guangguan Wang Reviewed-by: Zhu Yanjun Reviewed-by: D. Wythe Link: https://patch.msgid.link/20250828124117.2622624-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- net/smc/smc_ib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 53828833a3f7..a42ef3f77b96 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -742,6 +742,9 @@ bool smc_ib_is_sg_need_sync(struct smc_link *lnk, unsigned int i; bool ret = false; + if (!lnk->smcibdev->ibdev->dma_device) + return ret; + /* for now there is just one DMA address */ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, buf_slot->sgt[lnk->link_idx].nents, i) { -- cgit v1.2.3 From 773b27a8a2f00ce3134e92e50ea4794a98ba2b76 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 29 Aug 2025 15:28:26 +0800 Subject: net: mctp: mctp_fraq_queue should take ownership of passed skb As of commit f5d83cf0eeb9 ("net: mctp: unshare packets when reassembling"), we skb_unshare() in mctp_frag_queue(). The unshare may invalidate the original skb pointer, so we need to treat the skb as entirely owned by the fraq queue, even on failure. Fixes: f5d83cf0eeb9 ("net: mctp: unshare packets when reassembling") Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250829-mctp-skb-unshare-v1-1-1c28fe10235a@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 2b2b958ef6a3..4d314e062ba9 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -378,6 +378,7 @@ static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} #endif +/* takes ownership of skb, both in success and failure cases */ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) { struct mctp_hdr *hdr = mctp_hdr(skb); @@ -387,8 +388,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - /* Since we're manipulating the shared frag_list, ensure it isn't - * shared with any other SKBs. + /* Since we're manipulating the shared frag_list, ensure it + * isn't shared with any other SKBs. In the cloned case, + * this will free the skb; callers can no longer access it + * safely. */ key->reasm_head = skb_unshare(skb, GFP_ATOMIC); if (!key->reasm_head) @@ -402,10 +405,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; if (this_seq != exp_seq) - return -EINVAL; + goto err_free; if (key->reasm_head->len + skb->len > mctp_message_maxlen) - return -EINVAL; + goto err_free; skb->next = NULL; skb->sk = NULL; @@ -419,6 +422,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) key->reasm_head->truesize += skb->truesize; return 0; + +err_free: + kfree_skb(skb); + return -EINVAL; } static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) @@ -532,18 +539,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) * key isn't observable yet */ mctp_frag_queue(key, skb); + skb = NULL; /* if the key_add fails, we've raced with another * SOM packet with the same src, dest and tag. There's * no way to distinguish future packets, so all we - * can do is drop; we'll free the skb on exit from - * this function. + * can do is drop. */ rc = mctp_key_add(key, msk); - if (!rc) { + if (!rc) trace_mctp_key_acquire(key); - skb = NULL; - } /* we don't need to release key->lock on exit, so * clean up here and suppress the unlock via @@ -561,8 +566,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) key = NULL; } else { rc = mctp_frag_queue(key, skb); - if (!rc) - skb = NULL; + skb = NULL; } } @@ -572,17 +576,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) */ /* we need to be continuing an existing reassembly... */ - if (!key->reasm_head) + if (!key->reasm_head) { rc = -EINVAL; - else + } else { rc = mctp_frag_queue(key, skb); + skb = NULL; + } if (rc) goto out_unlock; - /* we've queued; the queue owns the skb now */ - skb = NULL; - /* end of message? deliver to socket, and we're done with * the reassembly/response key */ -- cgit v1.2.3 From e27e34bc99413a29cafae02ad572ea3c9beba2ce Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 29 Aug 2025 15:40:23 +0800 Subject: net: mctp: usb: initialise mac header in RX path We're not currently setting skb->mac_header on ingress, and the netdev core rx path expects it. Without it, we'll hit a warning on DEBUG_NETDEV from commit 1e4033b53db4 ("net: skb_reset_mac_len() must check if mac_header was set") Initialise the mac_header to refer to the USB transport header. Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250829-mctp-usb-mac-header-v1-1-338ad725e183@codeconstruct.com.au Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 775a386d0aca..36ccc53b1797 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -183,6 +183,7 @@ static void mctp_usb_in_complete(struct urb *urb) struct mctp_usb_hdr *hdr; u8 pkt_len; /* length of MCTP packet, no USB header */ + skb_reset_mac_header(skb); hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) break; -- cgit v1.2.3 From a6099f263e1f408bcc7913c9df24b0677164fc5d Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 29 Aug 2025 17:40:51 +0530 Subject: net: ethernet: ti: am65-cpsw-nuss: Fix null pointer dereference for ndev In the TX completion packet stage of TI SoCs with CPSW2G instance, which has single external ethernet port, ndev is accessed without being initialized if no TX packets have been processed. It results into null pointer dereference, causing kernel to crash. Fix this by having a check on the number of TX packets which have been processed. Fixes: 9a369ae3d143 ("net: ethernet: ti: am65-cpsw: remove am65_cpsw_nuss_tx_compl_packets_2g()") Signed-off-by: Nishanth Menon Signed-off-by: Chintan Vankar Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250829121051.2031832-1-c-vankar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecd6ecac87bb..8b2364f5f731 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1522,7 +1522,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, } } - if (single_port) { + if (single_port && num_tx) { netif_txq = netdev_get_tx_queue(ndev, chn); netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); -- cgit v1.2.3 From 403bf043d9340196e06769065169df7444b91f7a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Aug 2025 10:35:26 -0700 Subject: ice: fix NULL access of tx->in_use in ice_ptp_ts_irq The E810 device has support for a "low latency" firmware interface to access and read the Tx timestamps. This interface does not use the standard Tx timestamp logic, due to the latency overhead of proxying sideband command requests over the firmware AdminQ. The logic still makes use of the Tx timestamp tracking structure, ice_ptp_tx, as it uses the same "ready" bitmap to track which Tx timestamps complete. Unfortunately, the ice_ptp_ts_irq() function does not check if the tracker is initialized before its first access. This results in NULL dereference or use-after-free bugs similar to the following: [245977.278756] BUG: kernel NULL pointer dereference, address: 0000000000000000 [245977.278774] RIP: 0010:_find_first_bit+0x19/0x40 [245977.278796] Call Trace: [245977.278809] ? ice_misc_intr+0x364/0x380 [ice] This can occur if a Tx timestamp interrupt races with the driver reset logic. Fix this by only checking the in_use bitmap (and other fields) if the tracker is marked as initialized. The reset flow will clear the init field under lock before it tears the tracker down, thus preventing any use-after-free or NULL access. Fixes: f9472aaabd1f ("ice: Process TSYN IRQ in a separate function") Signed-off-by: Jacob Keller Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e358eb1d719f..fb0f6365a6d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2701,16 +2701,19 @@ irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) */ if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { struct ice_ptp_tx *tx = &pf->ptp.port.tx; - u8 idx; + u8 idx, last; if (!ice_pf_state_is_nominal(pf)) return IRQ_HANDLED; spin_lock(&tx->lock); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + if (tx->init) { + last = tx->last_ll_ts_idx_read + 1; + idx = find_next_bit_wrap(tx->in_use, tx->len, + last); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock(&tx->lock); return IRQ_HANDLED; -- cgit v1.2.3 From f6486338fde3f04ed0ec59fe67a69a208c32734f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Aug 2025 10:35:27 -0700 Subject: ice: fix NULL access of tx->in_use in ice_ll_ts_intr Recent versions of the E810 firmware have support for an extra interrupt to handle report of the "low latency" Tx timestamps coming from the specialized low latency firmware interface. Instead of polling the registers, software can wait until the low latency interrupt is fired. This logic makes use of the Tx timestamp tracking structure, ice_ptp_tx, as it uses the same "ready" bitmap to track which Tx timestamps complete. Unfortunately, the ice_ll_ts_intr() function does not check if the tracker is initialized before its first access. This results in NULL dereference or use-after-free bugs similar to the issues fixed in the ice_ptp_ts_irq() function. Fix this by only checking the in_use bitmap (and other fields) if the tracker is marked as initialized. The reset flow will clear the init field under lock before it tears the tracker down, thus preventing any use-after-free or NULL access. Fixes: 82e71b226e0e ("ice: Enable SW interrupt from FW for LL TS") Signed-off-by: Jacob Keller Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index cae992d8f03c..77781277aa8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3176,12 +3176,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) hw = &pf->hw; tx = &pf->ptp.port.tx; spin_lock_irqsave(&tx->lock, flags); - ice_ptp_complete_tx_single_tstamp(tx); + if (tx->init) { + ice_ptp_complete_tx_single_tstamp(tx); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock_irqrestore(&tx->lock, flags); val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | -- cgit v1.2.3 From 65637c3a181184ae25bd10d37bc83f8bb97708b5 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Mon, 11 Aug 2025 17:19:21 -0700 Subject: idpf: fix UAF in RDMA core aux dev deinitialization Free the adev->id before auxiliary_device_uninit. The call to uninit triggers the release callback, which frees the iadev memory containing the adev. The previous flow results in a UAF during rmmod due to the adev->id access. [264939.604077] ================================================================== [264939.604093] BUG: KASAN: slab-use-after-free in idpf_idc_deinit_core_aux_device+0xe4/0x100 [idpf] [264939.604134] Read of size 4 at addr ff1100109eb6eaf8 by task rmmod/17842 ... [264939.604635] Allocated by task 17597: [264939.604643] kasan_save_stack+0x20/0x40 [264939.604654] kasan_save_track+0x14/0x30 [264939.604663] __kasan_kmalloc+0x8f/0xa0 [264939.604672] idpf_idc_init_aux_core_dev+0x4bd/0xb60 [idpf] [264939.604700] idpf_idc_init+0x55/0xd0 [idpf] [264939.604726] process_one_work+0x658/0xfe0 [264939.604742] worker_thread+0x6e1/0xf10 [264939.604750] kthread+0x382/0x740 [264939.604762] ret_from_fork+0x23a/0x310 [264939.604772] ret_from_fork_asm+0x1a/0x30 [264939.604785] Freed by task 17842: [264939.604790] kasan_save_stack+0x20/0x40 [264939.604799] kasan_save_track+0x14/0x30 [264939.604808] kasan_save_free_info+0x3b/0x60 [264939.604820] __kasan_slab_free+0x37/0x50 [264939.604830] kfree+0xf1/0x420 [264939.604840] device_release+0x9c/0x210 [264939.604850] kobject_put+0x17c/0x4b0 [264939.604860] idpf_idc_deinit_core_aux_device+0x4f/0x100 [idpf] [264939.604886] idpf_vc_core_deinit+0xba/0x3a0 [idpf] [264939.604915] idpf_remove+0xb0/0x7c0 [idpf] [264939.604944] pci_device_remove+0xab/0x1e0 [264939.604955] device_release_driver_internal+0x371/0x530 [264939.604969] driver_detach+0xbf/0x180 [264939.604981] bus_remove_driver+0x11b/0x2a0 [264939.604991] pci_unregister_driver+0x2a/0x250 [264939.605005] __do_sys_delete_module.constprop.0+0x2eb/0x540 [264939.605014] do_syscall_64+0x64/0x2c0 [264939.605024] entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: f4312e6bfa2a ("idpf: implement core RDMA auxiliary dev create, init, and destroy") Signed-off-by: Joshua Hay Reviewed-by: Aleksandr Loktionov Reviewed-by: Vadim Fedorenko Tested-by: Samuel Salin Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_idc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 4d2905103215..7e20a07e98e5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -247,10 +247,10 @@ static void idpf_unplug_aux_dev(struct auxiliary_device *adev) if (!adev) return; + ida_free(&idpf_idc_ida, adev->id); + auxiliary_device_delete(adev); auxiliary_device_uninit(adev); - - ida_free(&idpf_idc_ida, adev->id); } /** -- cgit v1.2.3 From acf3a5c8be80fe238c1a7629db1c21c74a1f9dd4 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 14 Aug 2025 16:43:00 -0700 Subject: idpf: set mac type when adding and removing MAC filters On control planes that allow changing the MAC address of the interface, the driver must provide a MAC type to avoid errors such as: idpf 0000:0a:00.0: Transaction failed (op 535) idpf 0000:0a:00.0: Received invalid MAC filter payload (op 535) (len 0) idpf 0000:0a:00.0: Transaction failed (op 536) These errors occur during driver load or when changing the MAC via: ip link set address Add logic to set the MAC type when sending ADD/DEL (opcodes 535/536) to the control plane. Since only one primary MAC is supported per vport, the driver only needs to send an ADD opcode when setting it. Remove the old address by calling __idpf_del_mac_filter(), which skips the message and just clears the entry from the internal list. This avoids an error on DEL as it attempts to remove an address already cleared by the preceding ADD opcode. Fixes: ce1b75d0635c ("idpf: add ptypes and MAC filter support") Reported-by: Jian Liu Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 9 ++++++--- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 2c2a3e85d693..513032cb5f08 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2344,6 +2344,7 @@ static int idpf_set_mac(struct net_device *netdev, void *p) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; struct sockaddr *addr = p; + u8 old_mac_addr[ETH_ALEN]; struct idpf_vport *vport; int err = 0; @@ -2367,17 +2368,19 @@ static int idpf_set_mac(struct net_device *netdev, void *p) if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) goto unlock_mutex; + ether_addr_copy(old_mac_addr, vport->default_mac_addr); + ether_addr_copy(vport->default_mac_addr, addr->sa_data); vport_config = vport->adapter->vport_config[vport->idx]; err = idpf_add_mac_filter(vport, np, addr->sa_data, false); if (err) { __idpf_del_mac_filter(vport_config, addr->sa_data); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); goto unlock_mutex; } - if (is_valid_ether_addr(vport->default_mac_addr)) - idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + if (is_valid_ether_addr(old_mac_addr)) + __idpf_del_mac_filter(vport_config, old_mac_addr); - ether_addr_copy(vport->default_mac_addr, addr->sa_data); eth_hw_addr_set(netdev, addr->sa_data); unlock_mutex: diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index a028c69f7fdc..6330d4a0ae07 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3765,6 +3765,16 @@ u32 idpf_get_vport_id(struct idpf_vport *vport) return le32_to_cpu(vport_msg->vport_id); } +static void idpf_set_mac_type(struct idpf_vport *vport, + struct virtchnl2_mac_addr *mac_addr) +{ + bool is_primary; + + is_primary = ether_addr_equal(vport->default_mac_addr, mac_addr->addr); + mac_addr->type = is_primary ? VIRTCHNL2_MAC_ADDR_PRIMARY : + VIRTCHNL2_MAC_ADDR_EXTRA; +} + /** * idpf_mac_filter_async_handler - Async callback for mac filters * @adapter: private data struct @@ -3894,6 +3904,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, list) { if (add && f->add) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->add = false; if (i == total_filters) @@ -3901,6 +3912,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, } if (!add && f->remove) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->remove = false; if (i == total_filters) -- cgit v1.2.3 From 9fcdb1c3c4ba134434694c001dbff343f1ffa319 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Jul 2025 17:14:37 -0700 Subject: i40e: remove read access to debugfs files The 'command' and 'netdev_ops' debugfs files are a legacy debugging interface supported by the i40e driver since its early days by commit 02e9c290814c ("i40e: debugfs interface"). Both of these debugfs files provide a read handler which is mostly useless, and which is implemented with questionable logic. They both use a static 256 byte buffer which is initialized to the empty string. In the case of the 'command' file this buffer is literally never used and simply wastes space. In the case of the 'netdev_ops' file, the last command written is saved here. On read, the files contents are presented as the name of the device followed by a colon and then the contents of their respective static buffer. For 'command' this will always be ": ". For 'netdev_ops', this will be ": ". But note the buffer is shared between all devices operated by this module. At best, it is mostly meaningless information, and at worse it could be accessed simultaneously as there doesn't appear to be any locking mechanism. We have also recently received multiple reports for both read functions about their use of snprintf and potential overflow that could result in reading arbitrary kernel memory. For the 'command' file, this is definitely impossible, since the static buffer is always zero and never written to. For the 'netdev_ops' file, it does appear to be possible, if the user carefully crafts the command input, it will be copied into the buffer, which could be large enough to cause snprintf to truncate, which then causes the copy_to_user to read beyond the length of the buffer allocated by kzalloc. A minimal fix would be to replace snprintf() with scnprintf() which would cap the return to the number of bytes written, preventing an overflow. A more involved fix would be to drop the mostly useless static buffers, saving 512 bytes and modifying the read functions to stop needing those as input. Instead, lets just completely drop the read access to these files. These are debug interfaces exposed as part of debugfs, and I don't believe that dropping read access will break any script, as the provided output is pretty useless. You can find the netdev name through other more standard interfaces, and the 'netdev_ops' interface can easily result in garbage if you issue simultaneous writes to multiple devices at once. In order to properly remove the i40e_dbg_netdev_ops_buf, we need to refactor its write function to avoid using the static buffer. Instead, use the same logic as the i40e_dbg_command_write, with an allocated buffer. Update the code to use this instead of the static buffer, and ensure we free the buffer on exit. This fixes simultaneous writes to 'netdev_ops' on multiple devices, and allows us to remove the now unused static buffer along with removing the read access. Fixes: 02e9c290814c ("i40e: debugfs interface") Reported-by: Kunwu Chan Closes: https://lore.kernel.org/intel-wired-lan/20231208031950.47410-1-chentao@kylinos.cn/ Reported-by: Wang Haoran Closes: https://lore.kernel.org/all/CANZ3JQRRiOdtfQJoP9QM=6LS1Jto8PGBGw6y7-TL=BcnzHQn1Q@mail.gmail.com/ Reported-by: Amir Mohammad Jahangirzad Closes: https://lore.kernel.org/all/20250722115017.206969-1-a.jahangirzad@gmail.com/ Signed-off-by: Jacob Keller Reviewed-by: Dawid Osuchowski Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Reviewed-by: Kunwu Chan Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 123 ++++--------------------- 1 file changed, 19 insertions(+), 104 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 6cd6f23d42a6..c17b5d290f0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -40,48 +40,6 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid) * setup, adding or removing filters, or other things. Many of * these will be useful for some forms of unit testing. **************************************************************/ -static char i40e_dbg_command_buf[256] = ""; - -/** - * i40e_dbg_command_read - read for command datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partial reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_command_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} static char *i40e_filter_state_string[] = { "INVALID", @@ -1621,7 +1579,6 @@ command_write_done: static const struct file_operations i40e_dbg_command_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_command_read, .write = i40e_dbg_command_write, }; @@ -1630,48 +1587,6 @@ static const struct file_operations i40e_dbg_command_fops = { * The netdev_ops entry in debugfs is for giving the driver commands * to be executed from the netdev operations. **************************************************************/ -static char i40e_dbg_netdev_ops_buf[256] = ""; - -/** - * i40e_dbg_netdev_ops_read - read for netdev_ops datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partal reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_netdev_ops_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} /** * i40e_dbg_netdev_ops_write - write into netdev_ops datum @@ -1685,35 +1600,36 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, size_t count, loff_t *ppos) { struct i40e_pf *pf = filp->private_data; + char *cmd_buf, *buf_tmp; int bytes_not_copied; struct i40e_vsi *vsi; - char *buf_tmp; int vsi_seid; int i, cnt; /* don't allow partial writes */ if (*ppos != 0) return 0; - if (count >= sizeof(i40e_dbg_netdev_ops_buf)) - return -ENOSPC; - memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); - bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, - buffer, count); - if (bytes_not_copied) + cmd_buf = kzalloc(count + 1, GFP_KERNEL); + if (!cmd_buf) + return count; + bytes_not_copied = copy_from_user(cmd_buf, buffer, count); + if (bytes_not_copied) { + kfree(cmd_buf); return -EFAULT; - i40e_dbg_netdev_ops_buf[count] = '\0'; + } + cmd_buf[count] = '\0'; - buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); + buf_tmp = strchr(cmd_buf, '\n'); if (buf_tmp) { *buf_tmp = '\0'; - count = buf_tmp - i40e_dbg_netdev_ops_buf + 1; + count = buf_tmp - cmd_buf + 1; } - if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) { + if (strncmp(cmd_buf, "change_mtu", 10) == 0) { int mtu; - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i", + cnt = sscanf(&cmd_buf[11], "%i %i", &vsi_seid, &mtu); if (cnt != 2) { dev_info(&pf->pdev->dev, "change_mtu \n"); @@ -1735,8 +1651,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "set_rx_mode", 11) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "set_rx_mode", 11) == 0) { + cnt = sscanf(&cmd_buf[11], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "set_rx_mode \n"); goto netdev_ops_write_done; @@ -1756,8 +1672,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "napi", 4) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[4], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "napi", 4) == 0) { + cnt = sscanf(&cmd_buf[4], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "napi \n"); goto netdev_ops_write_done; @@ -1775,21 +1691,20 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "napi called\n"); } } else { - dev_info(&pf->pdev->dev, "unknown command '%s'\n", - i40e_dbg_netdev_ops_buf); + dev_info(&pf->pdev->dev, "unknown command '%s'\n", cmd_buf); dev_info(&pf->pdev->dev, "available commands\n"); dev_info(&pf->pdev->dev, " change_mtu \n"); dev_info(&pf->pdev->dev, " set_rx_mode \n"); dev_info(&pf->pdev->dev, " napi \n"); } netdev_ops_write_done: + kfree(cmd_buf); return count; } static const struct file_operations i40e_dbg_netdev_ops_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_netdev_ops_read, .write = i40e_dbg_netdev_ops_write, }; -- cgit v1.2.3 From a556f06338e1d5a85af0e32ecb46e365547f92b9 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Wed, 27 Aug 2025 19:56:31 +0800 Subject: i40e: Fix potential invalid access when MAC list is empty list_first_entry() never returns NULL - if the list is empty, it still returns a pointer to an invalid object, leading to potential invalid memory access when dereferenced. Fix this by using list_first_entry_or_null instead of list_first_entry. Fixes: e3219ce6a775 ("i40e: Add support for client interface for IWARP driver") Signed-off-by: Zhen Ni Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 5f1a405cbbf8..518bc738ea3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -359,8 +359,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf) if (i40e_client_get_params(vsi, &cdev->lan_info.params)) goto free_cdev; - mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, - struct netdev_hw_addr, list); + mac = list_first_entry_or_null(&cdev->lan_info.netdev->dev_addrs.list, + struct netdev_hw_addr, list); if (mac) ether_addr_copy(cdev->lan_info.lanmac, mac->addr); else -- cgit v1.2.3 From b7e5c3e3bfa9dc8af75ff6d8633ad7070e1985e4 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 10 Aug 2025 10:01:14 -0700 Subject: ixgbe: fix incorrect map used in eee linkmode incorrectly used ixgbe_lp_map in loops intended to populate the supported and advertised EEE linkmode bitmaps based on ixgbe_ls_map. This results in incorrect bit setting and potential out-of-bounds access, since ixgbe_lp_map and ixgbe_ls_map have different sizes and purposes. ixgbe_lp_map[i] -> ixgbe_ls_map[i] Use ixgbe_ls_map for supported and advertised linkmodes, and keep ixgbe_lp_map usage only for link partner (lp_advertised) mapping. Fixes: 9356b6db9d05 ("net: ethernet: ixgbe: Convert EEE to use linkmodes") Signed-off-by: Alok Tiwari Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 25c3a09ad7f1..1a2f1bdb91aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3571,13 +3571,13 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->supported); } for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->advertised); } -- cgit v1.2.3 From 90fb7db49c6dbac961c6b8ebfd741141ffbc8545 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Sun, 17 Aug 2025 12:25:47 +0300 Subject: e1000e: fix heap overflow in e1000_set_eeprom Fix a possible heap overflow in e1000_set_eeprom function by adding input validation for the requested length of the change in the EEPROM. In addition, change the variable type from int to size_t for better code practices and rearrange declarations to RCT. Cc: stable@vger.kernel.org Fixes: bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver (currently for ICH9 devices only)") Co-developed-by: Mikael Wessel Signed-off-by: Mikael Wessel Signed-off-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ethtool.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index c0bbb12eed2e..cf01a108a5bb 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -549,12 +549,12 @@ static int e1000_set_eeprom(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + size_t total_len, max_len; u16 *eeprom_buff; - void *ptr; - int max_len; + int ret_val = 0; int first_word; int last_word; - int ret_val = 0; + void *ptr; u16 i; if (eeprom->len == 0) @@ -569,6 +569,10 @@ static int e1000_set_eeprom(struct net_device *netdev, max_len = hw->nvm.word_size * 2; + if (check_add_overflow(eeprom->offset, eeprom->len, &total_len) || + total_len > max_len) + return -EFBIG; + first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc(max_len, GFP_KERNEL); -- cgit v1.2.3 From ddbf0e78a8b20ec18d314d31336a0230fdc9b394 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 31 Aug 2025 12:59:07 +0200 Subject: net: sfp: add quirk for FLYPRO copper SFP+ module Add quirk for a copper SFP that identifies itself as "FLYPRO" "SFP-10GT-CS-30M". It uses RollBall protocol to talk to the PHY. Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250831105910.3174-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 5347c95d1e77..4cd1d6c51dc2 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -492,6 +492,9 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex, sfp_fixup_nokia), + // FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY. + SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball), + // Fiberstore SFP-10G-T doesn't identify as copper, uses the Rollball // protocol to talk to the PHY and needs 4 sec wait before probing the // PHY. -- cgit v1.2.3 From 22c55fb9eb92395d999b8404d73e58540d11bdd8 Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Fri, 1 Aug 2025 16:19:20 +0530 Subject: wifi: ath12k: Set EMLSR support flag in MLO flags for EML-capable stations Currently, when updating EMLSR capabilities of a multi-link (ML) station, only the EMLSR parameters (e.g., padding delay, transition delay, and timeout) are sent to firmware. However, firmware also requires the EMLSR support flag to be set in the MLO flags of the peer assoc WMI command to properly handle EML operating mode notification frames. Set the ATH12K_WMI_FLAG_MLO_EMLSR_SUPPORT flag in the peer assoc WMI command when the ML station is EMLSR-capable, so that the firmware can respond to EHT EML action frames from associated stations. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 4bcf9525bc49 ("wifi: ath12k: update EMLSR capabilities of ML Station") Signed-off-by: Ramya Gnanasekar Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250801104920.3326352-1-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index da85c28ec355..742ffeb48bce 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -2423,6 +2423,7 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, eml_cap = arg->ml.eml_cap; if (u16_get_bits(eml_cap, IEEE80211_EML_CAP_EMLSR_SUPP)) { + ml_params->flags |= cpu_to_le32(ATH12K_WMI_FLAG_MLO_EMLSR_SUPPORT); /* Padding delay */ eml_pad_delay = ieee80211_emlsr_pad_delay_in_us(eml_cap); ml_params->emlsr_padding_delay_us = cpu_to_le32(eml_pad_delay); -- cgit v1.2.3 From 97acb0259cc9cbfbd7ab689e25684f3d8ce10e26 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Sun, 10 Aug 2025 22:30:18 +0530 Subject: wifi: ath11k: fix group data packet drops during rekey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During GTK rekey, mac80211 issues a clear key (if the old key exists) followed by an install key operation in the same context. This causes ath11k to send two WMI commands in quick succession: one to clear the old key and another to install the new key in the same slot. Under certain conditions—especially under high load or time sensitive scenarios, firmware may process these commands asynchronously in a way that firmware assumes the key is cleared whereas hardware has a valid key. This inconsistency between hardware and firmware leads to group addressed packet drops. Only setting the same key again can restore a valid key in firmware and allow packets to be transmitted. This issue remained latent because the host's clear key commands were not effective in firmware until commit 436a4e886598 ("ath11k: clear the keys properly via DISABLE_KEY"). That commit enabled the host to explicitly clear group keys, which inadvertently exposed the race. To mitigate this, restrict group key clearing across all modes (AP, STA, MESH). During rekey, the new key can simply be set on top of the previous one, avoiding the need for a clear followed by a set. However, in AP mode specifically, permit group key clearing when no stations are associated. This exception supports transitions from secure modes (e.g., WPA2/WPA3) to open mode, during which all associated peers are removed and the group key is cleared as part of the transition. Add a per-BSS station counter to track the presence of stations during set key operations. Also add a reset_group_keys flag to track the key re-installation state and avoid repeated installation of the same key when the number of connected stations transitions to non-zero within a rekey period. Additionally, for AP and Mesh modes, when the first station associates, reinstall the same group key that was last set. This ensures that the firmware recovers from any race that may have occurred during a previous key clear when no stations were associated. This change ensures that key clearing is permitted only when no clients are connected, avoiding packet loss while enabling dynamic security mode transitions. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Reported-by: Steffen Moser Closes: https://lore.kernel.org/linux-wireless/c6366409-9928-4dd7-bf7b-ba7fcf20eabf@steffen-moser.de Fixes: 436a4e886598 ("ath11k: clear the keys properly via DISABLE_KEY") Signed-off-by: Rameshkumar Sundaram Tested-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250810170018.1124014-1-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 2 + drivers/net/wireless/ath/ath11k/mac.c | 111 ++++++++++++++++++++++++++++++--- 2 files changed, 104 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 220d69a7a429..e8780b05ce11 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -411,6 +411,8 @@ struct ath11k_vif { bool do_not_send_tmpl; struct ath11k_arp_ns_offload arp_ns_offload; struct ath11k_rekey_data rekey_data; + u32 num_stations; + bool reinstall_group_keys; struct ath11k_reg_tpc_power_info reg_tpc_info; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 1fadf5faafb8..106e2530b64e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4317,6 +4317,40 @@ static int ath11k_clear_peer_keys(struct ath11k_vif *arvif, return first_errno; } +static int ath11k_set_group_keys(struct ath11k_vif *arvif) +{ + struct ath11k *ar = arvif->ar; + struct ath11k_base *ab = ar->ab; + const u8 *addr = arvif->bssid; + int i, ret, first_errno = 0; + struct ath11k_peer *peer; + + spin_lock_bh(&ab->base_lock); + peer = ath11k_peer_find(ab, arvif->vdev_id, addr); + spin_unlock_bh(&ab->base_lock); + + if (!peer) + return -ENOENT; + + for (i = 0; i < ARRAY_SIZE(peer->keys); i++) { + struct ieee80211_key_conf *key = peer->keys[i]; + + if (!key || (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) + continue; + + ret = ath11k_install_key(arvif, key, SET_KEY, addr, + WMI_KEY_GROUP); + if (ret < 0 && first_errno == 0) + first_errno = ret; + + if (ret < 0) + ath11k_warn(ab, "failed to set group key of idx %d for vdev %d: %d\n", + i, arvif->vdev_id, ret); + } + + return first_errno; +} + static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) @@ -4326,6 +4360,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ath11k_peer *peer; struct ath11k_sta *arsta; + bool is_ap_with_no_sta; const u8 *peer_addr; int ret = 0; u32 flags = 0; @@ -4386,16 +4421,57 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, else flags |= WMI_KEY_GROUP; - ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); - if (ret) { - ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); - goto exit; - } + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", + cmd == SET_KEY ? "SET_KEY" : "DEL_KEY", peer_addr, arvif->vdev_id, + flags, arvif->vdev_type, arvif->num_stations); + + /* Allow group key clearing only in AP mode when no stations are + * associated. There is a known race condition in firmware where + * group addressed packets may be dropped if the key is cleared + * and immediately set again during rekey. + * + * During GTK rekey, mac80211 issues a clear key (if the old key + * exists) followed by an install key operation for same key + * index. This causes ath11k to send two WMI commands in quick + * succession: one to clear the old key and another to install the + * new key in the same slot. + * + * Under certain conditions—especially under high load or time + * sensitive scenarios, firmware may process these commands + * asynchronously in a way that firmware assumes the key is + * cleared whereas hardware has a valid key. This inconsistency + * between hardware and firmware leads to group addressed packet + * drops after rekey. + * Only setting the same key again can restore a valid key in + * firmware and allow packets to be transmitted. + * + * There is a use case where an AP can transition from Secure mode + * to open mode without a vdev restart by just deleting all + * associated peers and clearing key, Hence allow clear key for + * that case alone. Mark arvif->reinstall_group_keys in such cases + * and reinstall the same key when the first peer is added, + * allowing firmware to recover from the race if it had occurred. + */ - ret = ath11k_dp_peer_rx_pn_replay_config(arvif, peer_addr, cmd, key); - if (ret) { - ath11k_warn(ab, "failed to offload PN replay detection %d\n", ret); - goto exit; + is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && + !arvif->num_stations); + if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); + if (ret) { + ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); + goto exit; + } + + ret = ath11k_dp_peer_rx_pn_replay_config(arvif, peer_addr, cmd, key); + if (ret) { + ath11k_warn(ab, "failed to offload PN replay detection %d\n", + ret); + goto exit; + } + + if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + arvif->reinstall_group_keys = true; } spin_lock_bh(&ab->base_lock); @@ -4994,6 +5070,7 @@ static int ath11k_mac_inc_num_stations(struct ath11k_vif *arvif, return -ENOBUFS; ar->num_stations++; + arvif->num_stations++; return 0; } @@ -5009,6 +5086,7 @@ static void ath11k_mac_dec_num_stations(struct ath11k_vif *arvif, return; ar->num_stations--; + arvif->num_stations--; } static u32 ath11k_mac_ieee80211_sta_bw_to_wmi(struct ath11k *ar, @@ -9540,6 +9618,21 @@ static int ath11k_mac_station_add(struct ath11k *ar, goto exit; } + /* Driver allows the DEL KEY followed by SET KEY sequence for + * group keys for only when there is no clients associated, if at + * all firmware has entered the race during that window, + * reinstalling the same key when the first sta connects will allow + * firmware to recover from the race. + */ + if (arvif->num_stations == 1 && arvif->reinstall_group_keys) { + ath11k_dbg(ab, ATH11K_DBG_MAC, "set group keys on 1st station add for vdev %d\n", + arvif->vdev_id); + ret = ath11k_set_group_keys(arvif); + if (ret) + goto dec_num_station; + arvif->reinstall_group_keys = false; + } + arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL); if (!arsta->rx_stats) { ret = -ENOMEM; -- cgit v1.2.3 From 8b3332c1331c7c260bdff89bfdfd24ea263be764 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Sep 2025 14:08:17 -0700 Subject: Revert "eth: remove the DLink/Sundance (ST201) driver" This reverts commit 8401a108a63302a5a198c7075d857895ca624851. I got a report from an (anonymous) Sundance user: Ethernet controller: Sundance Technology Inc / IC Plus Corp IC Plus IP100A Integrated 10/100 Ethernet MAC + PHY (rev 31) Revert the driver back in. Make following changes: - update Denis's email address in MAINTAINERS - adjust to timer API renames: - del_timer_sync() -> timer_delete_sync() - from_timer() -> timer_container_of() Fixes: 8401a108a633 ("eth: remove the DLink/Sundance (ST201) driver") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901210818.1025316-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 + arch/mips/configs/mtx1_defconfig | 1 + arch/powerpc/configs/ppc6xx_defconfig | 1 + drivers/net/ethernet/dlink/Kconfig | 20 + drivers/net/ethernet/dlink/Makefile | 1 + drivers/net/ethernet/dlink/sundance.c | 1985 +++++++++++++++++++++++++++++++++ 6 files changed, 2014 insertions(+) create mode 100644 drivers/net/ethernet/dlink/sundance.c diff --git a/MAINTAINERS b/MAINTAINERS index 09b34bbd0c48..1819c477eee3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24252,6 +24252,12 @@ S: Maintained F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml F: drivers/input/keyboard/sun4i-lradc-keys.c +SUNDANCE NETWORK DRIVER +M: Denis Kirjanov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/dlink/sundance.c + SUNPLUS ETHERNET DRIVER M: Wells Lu L: netdev@vger.kernel.org diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index e4bcdb64df6c..2707ab134639 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -273,6 +273,7 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m +CONFIG_SUNDANCE=m CONFIG_PCMCIA_FMVJ18X=m CONFIG_E100=m CONFIG_E1000=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index bb359643ddc1..b082c1fae13c 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -433,6 +433,7 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m +CONFIG_SUNDANCE=m CONFIG_S2IO=m CONFIG_FEC_MPC52xx=m CONFIG_GIANFAR=m diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig index e9e13654812c..0d77f84c8e7b 100644 --- a/drivers/net/ethernet/dlink/Kconfig +++ b/drivers/net/ethernet/dlink/Kconfig @@ -32,4 +32,24 @@ config DL2K To compile this driver as a module, choose M here: the module will be called dl2k. +config SUNDANCE + tristate "Sundance Alta support" + depends on PCI + select CRC32 + select MII + help + This driver is for the Sundance "Alta" chip. + More specific information and updates are available from + . + +config SUNDANCE_MMIO + bool "Use MMIO instead of PIO" + depends on SUNDANCE + help + Enable memory-mapped I/O for interaction with Sundance NIC registers. + Do NOT enable this by default, PIO (enabled when MMIO is disabled) + is known to solve bugs on certain chips. + + If unsure, say N. + endif # NET_VENDOR_DLINK diff --git a/drivers/net/ethernet/dlink/Makefile b/drivers/net/ethernet/dlink/Makefile index 38c236eb6007..3ff503c747db 100644 --- a/drivers/net/ethernet/dlink/Makefile +++ b/drivers/net/ethernet/dlink/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_DL2K) += dl2k.o +obj-$(CONFIG_SUNDANCE) += sundance.o diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c new file mode 100644 index 000000000000..29d59c42dfa3 --- /dev/null +++ b/drivers/net/ethernet/dlink/sundance.c @@ -0,0 +1,1985 @@ +/* sundance.c: A Linux device driver for the Sundance ST201 "Alta". */ +/* + Written 1999-2000 by Donald Becker. + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Support and updates available at + http://www.scyld.com/network/sundance.html + [link no longer provides useful info -jgarzik] + Archives of the mailing list are still available at + https://www.beowulf.org/pipermail/netdrivers/ + +*/ + +#define DRV_NAME "sundance" + +/* The user-configurable values. + These may be modified when a driver module is loaded.*/ +static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ +/* Maximum number of multicast addresses to filter (vs. rx-all-multicast). + Typical is a 64 element hash table based on the Ethernet CRC. */ +static const int multicast_filter_limit = 32; + +/* Set the copy breakpoint for the copy-only-tiny-frames scheme. + Setting to > 1518 effectively disables this feature. + This chip can receive into offset buffers, so the Alpha does not + need a copy-align. */ +static int rx_copybreak; +static int flowctrl=1; + +/* media[] specifies the media type the NIC operates at. + autosense Autosensing active media. + 10mbps_hd 10Mbps half duplex. + 10mbps_fd 10Mbps full duplex. + 100mbps_hd 100Mbps half duplex. + 100mbps_fd 100Mbps full duplex. + 0 Autosensing active media. + 1 10Mbps half duplex. + 2 10Mbps full duplex. + 3 100Mbps half duplex. + 4 100Mbps full duplex. +*/ +#define MAX_UNITS 8 +static char *media[MAX_UNITS]; + + +/* Operational parameters that are set at compile time. */ + +/* Keep the ring sizes a power of two for compile efficiency. + The compiler will convert '%'<2^N> into a bit mask. + Making the Tx ring too large decreases the effectiveness of channel + bonding and packet priority, and more than 128 requires modifying the + Tx error recovery. + Large receive rings merely waste memory. */ +#define TX_RING_SIZE 32 +#define TX_QUEUE_LEN (TX_RING_SIZE - 1) /* Limit ring entries actually used. */ +#define RX_RING_SIZE 64 +#define RX_BUDGET 32 +#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct netdev_desc) +#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct netdev_desc) + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (4*HZ) +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ + +/* Include files, designed to support most kernel versions 2.0.0 and later. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* Processor type for cache alignment. */ +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Donald Becker "); +MODULE_DESCRIPTION("Sundance Alta Ethernet driver"); +MODULE_LICENSE("GPL"); + +module_param(debug, int, 0); +module_param(rx_copybreak, int, 0); +module_param_array(media, charp, NULL, 0); +module_param(flowctrl, int, 0); +MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)"); +MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]"); + +/* + Theory of Operation + +I. Board Compatibility + +This driver is designed for the Sundance Technologies "Alta" ST201 chip. + +II. Board-specific settings + +III. Driver operation + +IIIa. Ring buffers + +This driver uses two statically allocated fixed-size descriptor lists +formed into rings by a branch from the final descriptor to the beginning of +the list. The ring sizes are set at compile time by RX/TX_RING_SIZE. +Some chips explicitly use only 2^N sized rings, while others use a +'next descriptor' pointer that the driver forms into rings. + +IIIb/c. Transmit/Receive Structure + +This driver uses a zero-copy receive and transmit scheme. +The driver allocates full frame size skbuffs for the Rx ring buffers at +open() time and passes the skb->data field to the chip as receive data +buffers. When an incoming frame is less than RX_COPYBREAK bytes long, +a fresh skbuff is allocated and the frame is copied to the new skbuff. +When the incoming frame is larger, the skbuff is passed directly up the +protocol stack. Buffers consumed this way are replaced by newly allocated +skbuffs in a later phase of receives. + +The RX_COPYBREAK value is chosen to trade-off the memory wasted by +using a full-sized skbuff for small frames vs. the copying costs of larger +frames. New boards are typically used in generously configured machines +and the underfilled buffers have negligible impact compared to the benefit of +a single allocation size, so the default value of zero results in never +copying packets. When copying is done, the cost is usually mitigated by using +a combined copy/checksum routine. Copying also preloads the cache, which is +most useful with small frames. + +A subtle aspect of the operation is that the IP header at offset 14 in an +ethernet frame isn't longword aligned for further processing. +Unaligned buffers are permitted by the Sundance hardware, so +frames are received into the skbuff at an offset of "+2", 16-byte aligning +the IP header. + +IIId. Synchronization + +The driver runs as two independent, single-threaded flows of control. One +is the send-packet routine, which enforces single-threaded use by the +dev->tbusy flag. The other thread is the interrupt handler, which is single +threaded by the hardware and interrupt handling software. + +The send packet thread has partial control over the Tx ring and 'dev->tbusy' +flag. It sets the tbusy flag whenever it's queuing a Tx packet. If the next +queue slot is empty, it clears the tbusy flag when finished otherwise it sets +the 'lp->tx_full' flag. + +The interrupt handler has exclusive control over the Rx ring and records stats +from the Tx ring. After reaping the stats, it marks the Tx queue entry as +empty by incrementing the dirty_tx mark. Iff the 'lp->tx_full' flag is set, it +clears both the tx_full and tbusy flags. + +IV. Notes + +IVb. References + +The Sundance ST201 datasheet, preliminary version. +The Kendin KS8723 datasheet, preliminary version. +The ICplus IP100 datasheet, preliminary version. +http://www.scyld.com/expert/100mbps.html +http://www.scyld.com/expert/NWay.html + +IVc. Errata + +*/ + +/* Work-around for Kendin chip bugs. */ +#ifndef CONFIG_SUNDANCE_MMIO +#define USE_IO_OPS 1 +#endif + +static const struct pci_device_id sundance_pci_tbl[] = { + { 0x1186, 0x1002, 0x1186, 0x1002, 0, 0, 0 }, + { 0x1186, 0x1002, 0x1186, 0x1003, 0, 0, 1 }, + { 0x1186, 0x1002, 0x1186, 0x1012, 0, 0, 2 }, + { 0x1186, 0x1002, 0x1186, 0x1040, 0, 0, 3 }, + { 0x1186, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { 0x13F0, 0x0201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, + { 0x13F0, 0x0200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 }, + { } +}; +MODULE_DEVICE_TABLE(pci, sundance_pci_tbl); + +enum { + netdev_io_size = 128 +}; + +struct pci_id_info { + const char *name; +}; +static const struct pci_id_info pci_id_tbl[] = { + {"D-Link DFE-550TX FAST Ethernet Adapter"}, + {"D-Link DFE-550FX 100Mbps Fiber-optics Adapter"}, + {"D-Link DFE-580TX 4 port Server Adapter"}, + {"D-Link DFE-530TXS FAST Ethernet Adapter"}, + {"D-Link DL10050-based FAST Ethernet Adapter"}, + {"Sundance Technology Alta"}, + {"IC Plus Corporation IP100A FAST Ethernet Adapter"}, + { } /* terminate list. */ +}; + +/* This driver was written to use PCI memory space, however x86-oriented + hardware often uses I/O space accesses. */ + +/* Offsets to the device registers. + Unlike software-only systems, device drivers interact with complex hardware. + It's not useful to define symbolic names for every register bit in the + device. The name can only partially document the semantics and make + the driver longer and more difficult to read. + In general, only the important configuration values or bits changed + multiple times should be defined symbolically. +*/ +enum alta_offsets { + DMACtrl = 0x00, + TxListPtr = 0x04, + TxDMABurstThresh = 0x08, + TxDMAUrgentThresh = 0x09, + TxDMAPollPeriod = 0x0a, + RxDMAStatus = 0x0c, + RxListPtr = 0x10, + DebugCtrl0 = 0x1a, + DebugCtrl1 = 0x1c, + RxDMABurstThresh = 0x14, + RxDMAUrgentThresh = 0x15, + RxDMAPollPeriod = 0x16, + LEDCtrl = 0x1a, + ASICCtrl = 0x30, + EEData = 0x34, + EECtrl = 0x36, + FlashAddr = 0x40, + FlashData = 0x44, + WakeEvent = 0x45, + TxStatus = 0x46, + TxFrameId = 0x47, + DownCounter = 0x18, + IntrClear = 0x4a, + IntrEnable = 0x4c, + IntrStatus = 0x4e, + MACCtrl0 = 0x50, + MACCtrl1 = 0x52, + StationAddr = 0x54, + MaxFrameSize = 0x5A, + RxMode = 0x5c, + MIICtrl = 0x5e, + MulticastFilter0 = 0x60, + MulticastFilter1 = 0x64, + RxOctetsLow = 0x68, + RxOctetsHigh = 0x6a, + TxOctetsLow = 0x6c, + TxOctetsHigh = 0x6e, + TxFramesOK = 0x70, + RxFramesOK = 0x72, + StatsCarrierError = 0x74, + StatsLateColl = 0x75, + StatsMultiColl = 0x76, + StatsOneColl = 0x77, + StatsTxDefer = 0x78, + RxMissed = 0x79, + StatsTxXSDefer = 0x7a, + StatsTxAbort = 0x7b, + StatsBcastTx = 0x7c, + StatsBcastRx = 0x7d, + StatsMcastTx = 0x7e, + StatsMcastRx = 0x7f, + /* Aliased and bogus values! */ + RxStatus = 0x0c, +}; + +#define ASIC_HI_WORD(x) ((x) + 2) + +enum ASICCtrl_HiWord_bit { + GlobalReset = 0x0001, + RxReset = 0x0002, + TxReset = 0x0004, + DMAReset = 0x0008, + FIFOReset = 0x0010, + NetworkReset = 0x0020, + HostReset = 0x0040, + ResetBusy = 0x0400, +}; + +/* Bits in the interrupt status/mask registers. */ +enum intr_status_bits { + IntrSummary=0x0001, IntrPCIErr=0x0002, IntrMACCtrl=0x0008, + IntrTxDone=0x0004, IntrRxDone=0x0010, IntrRxStart=0x0020, + IntrDrvRqst=0x0040, + StatsMax=0x0080, LinkChange=0x0100, + IntrTxDMADone=0x0200, IntrRxDMADone=0x0400, +}; + +/* Bits in the RxMode register. */ +enum rx_mode_bits { + AcceptAllIPMulti=0x20, AcceptMultiHash=0x10, AcceptAll=0x08, + AcceptBroadcast=0x04, AcceptMulticast=0x02, AcceptMyPhys=0x01, +}; +/* Bits in MACCtrl. */ +enum mac_ctrl0_bits { + EnbFullDuplex=0x20, EnbRcvLargeFrame=0x40, + EnbFlowCtrl=0x100, EnbPassRxCRC=0x200, +}; +enum mac_ctrl1_bits { + StatsEnable=0x0020, StatsDisable=0x0040, StatsEnabled=0x0080, + TxEnable=0x0100, TxDisable=0x0200, TxEnabled=0x0400, + RxEnable=0x0800, RxDisable=0x1000, RxEnabled=0x2000, +}; + +/* Bits in WakeEvent register. */ +enum wake_event_bits { + WakePktEnable = 0x01, + MagicPktEnable = 0x02, + LinkEventEnable = 0x04, + WolEnable = 0x80, +}; + +/* The Rx and Tx buffer descriptors. */ +/* Note that using only 32 bit fields simplifies conversion to big-endian + architectures. */ +struct netdev_desc { + __le32 next_desc; + __le32 status; + struct desc_frag { __le32 addr, length; } frag; +}; + +/* Bits in netdev_desc.status */ +enum desc_status_bits { + DescOwn=0x8000, + DescEndPacket=0x4000, + DescEndRing=0x2000, + LastFrag=0x80000000, + DescIntrOnTx=0x8000, + DescIntrOnDMADone=0x80000000, + DisableAlign = 0x00000001, +}; + +#define PRIV_ALIGN 15 /* Required alignment mask */ +/* Use __attribute__((aligned (L1_CACHE_BYTES))) to maintain alignment + within the structure. */ +#define MII_CNT 4 +struct netdev_private { + /* Descriptor rings first for alignment. */ + struct netdev_desc *rx_ring; + struct netdev_desc *tx_ring; + struct sk_buff* rx_skbuff[RX_RING_SIZE]; + struct sk_buff* tx_skbuff[TX_RING_SIZE]; + dma_addr_t tx_ring_dma; + dma_addr_t rx_ring_dma; + struct timer_list timer; /* Media monitoring timer. */ + struct net_device *ndev; /* backpointer */ + /* ethtool extra stats */ + struct { + u64 tx_multiple_collisions; + u64 tx_single_collisions; + u64 tx_late_collisions; + u64 tx_deferred; + u64 tx_deferred_excessive; + u64 tx_aborted; + u64 tx_bcasts; + u64 rx_bcasts; + u64 tx_mcasts; + u64 rx_mcasts; + } xstats; + /* Frequently used values: keep some adjacent for cache effect. */ + spinlock_t lock; + int msg_enable; + int chip_id; + unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ + unsigned int rx_buf_sz; /* Based on MTU+slack. */ + struct netdev_desc *last_tx; /* Last Tx descriptor used. */ + unsigned int cur_tx, dirty_tx; + /* These values are keep track of the transceiver/media in use. */ + unsigned int flowctrl:1; + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int an_enable:1; + unsigned int speed; + unsigned int wol_enabled:1; /* Wake on LAN enabled */ + struct tasklet_struct rx_tasklet; + struct tasklet_struct tx_tasklet; + int budget; + int cur_task; + /* Multicast and receive mode. */ + spinlock_t mcastlock; /* SMP lock multicast updates. */ + u16 mcast_filter[4]; + /* MII transceiver section. */ + struct mii_if_info mii_if; + int mii_preamble_required; + unsigned char phys[MII_CNT]; /* MII device addresses, only first one used. */ + struct pci_dev *pci_dev; + void __iomem *base; + spinlock_t statlock; +}; + +/* The station address location in the EEPROM. */ +#define EEPROM_SA_OFFSET 0x10 +#define DEFAULT_INTR (IntrRxDMADone | IntrPCIErr | \ + IntrDrvRqst | IntrTxDone | StatsMax | \ + LinkChange) + +static int change_mtu(struct net_device *dev, int new_mtu); +static int eeprom_read(void __iomem *ioaddr, int location); +static int mdio_read(struct net_device *dev, int phy_id, int location); +static void mdio_write(struct net_device *dev, int phy_id, int location, int value); +static int mdio_wait_link(struct net_device *dev, int wait); +static int netdev_open(struct net_device *dev); +static void check_duplex(struct net_device *dev); +static void netdev_timer(struct timer_list *t); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); +static void init_ring(struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); +static int reset_tx (struct net_device *dev); +static irqreturn_t intr_handler(int irq, void *dev_instance); +static void rx_poll(struct tasklet_struct *t); +static void tx_poll(struct tasklet_struct *t); +static void refill_rx (struct net_device *dev); +static void netdev_error(struct net_device *dev, int intr_status); +static void netdev_error(struct net_device *dev, int intr_status); +static void set_rx_mode(struct net_device *dev); +static int __set_mac_addr(struct net_device *dev); +static int sundance_set_mac_addr(struct net_device *dev, void *data); +static struct net_device_stats *get_stats(struct net_device *dev); +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int netdev_close(struct net_device *dev); +static const struct ethtool_ops ethtool_ops; + +static void sundance_reset(struct net_device *dev, unsigned long reset_cmd) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base + ASICCtrl; + int countdown; + + /* ST201 documentation states ASICCtrl is a 32bit register */ + iowrite32 (reset_cmd | ioread32 (ioaddr), ioaddr); + /* ST201 documentation states reset can take up to 1 ms */ + countdown = 10 + 1; + while (ioread32 (ioaddr) & (ResetBusy << 16)) { + if (--countdown == 0) { + printk(KERN_WARNING "%s : reset not completed !!\n", dev->name); + break; + } + udelay(100); + } +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void sundance_poll_controller(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + + disable_irq(np->pci_dev->irq); + intr_handler(np->pci_dev->irq, dev); + enable_irq(np->pci_dev->irq); +} +#endif + +static const struct net_device_ops netdev_ops = { + .ndo_open = netdev_open, + .ndo_stop = netdev_close, + .ndo_start_xmit = start_tx, + .ndo_get_stats = get_stats, + .ndo_set_rx_mode = set_rx_mode, + .ndo_eth_ioctl = netdev_ioctl, + .ndo_tx_timeout = tx_timeout, + .ndo_change_mtu = change_mtu, + .ndo_set_mac_address = sundance_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = sundance_poll_controller, +#endif +}; + +static int sundance_probe1(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *dev; + struct netdev_private *np; + static int card_idx; + int chip_idx = ent->driver_data; + int irq; + int i; + void __iomem *ioaddr; + u16 mii_ctl; + void *ring_space; + dma_addr_t ring_dma; +#ifdef USE_IO_OPS + int bar = 0; +#else + int bar = 1; +#endif + int phy, phy_end, phy_idx = 0; + __le16 addr[ETH_ALEN / 2]; + + if (pci_enable_device(pdev)) + return -EIO; + pci_set_master(pdev); + + irq = pdev->irq; + + dev = alloc_etherdev(sizeof(*np)); + if (!dev) + return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); + + if (pci_request_regions(pdev, DRV_NAME)) + goto err_out_netdev; + + ioaddr = pci_iomap(pdev, bar, netdev_io_size); + if (!ioaddr) + goto err_out_res; + + for (i = 0; i < 3; i++) + addr[i] = + cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET)); + eth_hw_addr_set(dev, (u8 *)addr); + + np = netdev_priv(dev); + np->ndev = dev; + np->base = ioaddr; + np->pci_dev = pdev; + np->chip_id = chip_idx; + np->msg_enable = (1 << debug) - 1; + spin_lock_init(&np->lock); + spin_lock_init(&np->statlock); + tasklet_setup(&np->rx_tasklet, rx_poll); + tasklet_setup(&np->tx_tasklet, tx_poll); + + ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_cleardev; + np->tx_ring = (struct netdev_desc *)ring_space; + np->tx_ring_dma = ring_dma; + + ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_unmap_tx; + np->rx_ring = (struct netdev_desc *)ring_space; + np->rx_ring_dma = ring_dma; + + np->mii_if.dev = dev; + np->mii_if.mdio_read = mdio_read; + np->mii_if.mdio_write = mdio_write; + np->mii_if.phy_id_mask = 0x1f; + np->mii_if.reg_num_mask = 0x1f; + + /* The chip-specific entries in the device structure. */ + dev->netdev_ops = &netdev_ops; + dev->ethtool_ops = ðtool_ops; + dev->watchdog_timeo = TX_TIMEOUT; + + /* MTU range: 68 - 8191 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 8191; + + pci_set_drvdata(pdev, dev); + + i = register_netdev(dev); + if (i) + goto err_out_unmap_rx; + + printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n", + dev->name, pci_id_tbl[chip_idx].name, ioaddr, + dev->dev_addr, irq); + + np->phys[0] = 1; /* Default setting */ + np->mii_preamble_required++; + + /* + * It seems some phys doesn't deal well with address 0 being accessed + * first + */ + if (sundance_pci_tbl[np->chip_id].device == 0x0200) { + phy = 0; + phy_end = 31; + } else { + phy = 1; + phy_end = 32; /* wraps to zero, due to 'phy & 0x1f' */ + } + for (; phy <= phy_end && phy_idx < MII_CNT; phy++) { + int phyx = phy & 0x1f; + int mii_status = mdio_read(dev, phyx, MII_BMSR); + if (mii_status != 0xffff && mii_status != 0x0000) { + np->phys[phy_idx++] = phyx; + np->mii_if.advertising = mdio_read(dev, phyx, MII_ADVERTISE); + if ((mii_status & 0x0040) == 0) + np->mii_preamble_required++; + printk(KERN_INFO "%s: MII PHY found at address %d, status " + "0x%4.4x advertising %4.4x.\n", + dev->name, phyx, mii_status, np->mii_if.advertising); + } + } + np->mii_preamble_required--; + + if (phy_idx == 0) { + printk(KERN_INFO "%s: No MII transceiver found, aborting. ASIC status %x\n", + dev->name, ioread32(ioaddr + ASICCtrl)); + goto err_out_unregister; + } + + np->mii_if.phy_id = np->phys[0]; + + /* Parse override configuration */ + np->an_enable = 1; + if (card_idx < MAX_UNITS) { + if (media[card_idx] != NULL) { + np->an_enable = 0; + if (strcmp (media[card_idx], "100mbps_fd") == 0 || + strcmp (media[card_idx], "4") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "100mbps_hd") == 0 || + strcmp (media[card_idx], "3") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 0; + } else if (strcmp (media[card_idx], "10mbps_fd") == 0 || + strcmp (media[card_idx], "2") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "10mbps_hd") == 0 || + strcmp (media[card_idx], "1") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 0; + } else { + np->an_enable = 1; + } + } + if (flowctrl == 1) + np->flowctrl = 1; + } + + /* Fibre PHY? */ + if (ioread32 (ioaddr + ASICCtrl) & 0x80) { + /* Default 100Mbps Full */ + if (np->an_enable) { + np->speed = 100; + np->mii_if.full_duplex = 1; + np->an_enable = 0; + } + } + /* Reset PHY */ + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_RESET); + mdelay (300); + /* If flow control enabled, we need to advertise it.*/ + if (np->flowctrl) + mdio_write (dev, np->phys[0], MII_ADVERTISE, np->mii_if.advertising | 0x0400); + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART); + /* Force media type */ + if (!np->an_enable) { + mii_ctl = 0; + mii_ctl |= (np->speed == 100) ? BMCR_SPEED100 : 0; + mii_ctl |= (np->mii_if.full_duplex) ? BMCR_FULLDPLX : 0; + mdio_write (dev, np->phys[0], MII_BMCR, mii_ctl); + printk (KERN_INFO "Override speed=%d, %s duplex\n", + np->speed, np->mii_if.full_duplex ? "Full" : "Half"); + + } + + /* Perhaps move the reset here? */ + /* Reset the chip to erase previous misconfiguration. */ + if (netif_msg_hw(np)) + printk("ASIC Control is %x.\n", ioread32(ioaddr + ASICCtrl)); + sundance_reset(dev, 0x00ff << 16); + if (netif_msg_hw(np)) + printk("ASIC Control is now %x.\n", ioread32(ioaddr + ASICCtrl)); + + card_idx++; + return 0; + +err_out_unregister: + unregister_netdev(dev); +err_out_unmap_rx: + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); +err_out_unmap_tx: + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); +err_out_cleardev: + pci_iounmap(pdev, ioaddr); +err_out_res: + pci_release_regions(pdev); +err_out_netdev: + free_netdev (dev); + return -ENODEV; +} + +static int change_mtu(struct net_device *dev, int new_mtu) +{ + if (netif_running(dev)) + return -EBUSY; + WRITE_ONCE(dev->mtu, new_mtu); + return 0; +} + +#define eeprom_delay(ee_addr) ioread32(ee_addr) +/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces. */ +static int eeprom_read(void __iomem *ioaddr, int location) +{ + int boguscnt = 10000; /* Typical 1900 ticks. */ + iowrite16(0x0200 | (location & 0xff), ioaddr + EECtrl); + do { + eeprom_delay(ioaddr + EECtrl); + if (! (ioread16(ioaddr + EECtrl) & 0x8000)) { + return ioread16(ioaddr + EEData); + } + } while (--boguscnt > 0); + return 0; +} + +/* MII transceiver control section. + Read and write the MII registers using software-generated serial + MDIO protocol. See the MII specifications or DP83840A data sheet + for details. + + The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back 33Mhz PCI cycles. */ +#define mdio_delay() ioread8(mdio_addr) + +enum mii_reg_bits { + MDIO_ShiftClk=0x0001, MDIO_Data=0x0002, MDIO_EnbOutput=0x0004, +}; +#define MDIO_EnbIn (0) +#define MDIO_WRITE0 (MDIO_EnbOutput) +#define MDIO_WRITE1 (MDIO_Data | MDIO_EnbOutput) + +/* Generate the preamble required for initial synchronization and + a few older transceivers. */ +static void mdio_sync(void __iomem *mdio_addr) +{ + int bits = 32; + + /* Establish sync by sending at least 32 logic ones. */ + while (--bits >= 0) { + iowrite8(MDIO_WRITE1, mdio_addr); + mdio_delay(); + iowrite8(MDIO_WRITE1 | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; + int i, retval = 0; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the read command bits out. */ + for (i = 15; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Read the two transition, 16 data, and wire-idle bits. */ + for (i = 19; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + retval = (retval << 1) | ((ioread8(mdio_addr) & MDIO_Data) ? 1 : 0); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + return (retval>>1) & 0xffff; +} + +static void mdio_write(struct net_device *dev, int phy_id, int location, int value) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location<<18) | value; + int i; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Clear out extra bits. */ + for (i = 2; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_wait_link(struct net_device *dev, int wait) +{ + int bmsr; + int phy_id; + struct netdev_private *np; + + np = netdev_priv(dev); + phy_id = np->phys[0]; + + do { + bmsr = mdio_read(dev, phy_id, MII_BMSR); + if (bmsr & 0x0004) + return 0; + mdelay(1); + } while (--wait > 0); + return -1; +} + +static int netdev_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + const int irq = np->pci_dev->irq; + unsigned long flags; + int i; + + sundance_reset(dev, 0x00ff << 16); + + i = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev); + if (i) + return i; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: netdev_open() irq %d\n", dev->name, irq); + + init_ring(dev); + + iowrite32(np->rx_ring_dma, ioaddr + RxListPtr); + /* The Tx list pointer is written as packets are queued. */ + + /* Initialize other registers. */ + __set_mac_addr(dev); +#if IS_ENABLED(CONFIG_VLAN_8021Q) + iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); +#else + iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); +#endif + if (dev->mtu > 2047) + iowrite32(ioread32(ioaddr + ASICCtrl) | 0x0C, ioaddr + ASICCtrl); + + /* Configure the PCI bus bursts and FIFO thresholds. */ + + if (dev->if_port == 0) + dev->if_port = np->default_port; + + spin_lock_init(&np->mcastlock); + + set_rx_mode(dev); + iowrite16(0, ioaddr + IntrEnable); + iowrite16(0, ioaddr + DownCounter); + /* Set the chip to poll every N*320nsec. */ + iowrite8(100, ioaddr + RxDMAPollPeriod); + iowrite8(127, ioaddr + TxDMAPollPeriod); + /* Fix DFE-580TX packet drop issue */ + if (np->pci_dev->revision >= 0x14) + iowrite8(0x01, ioaddr + DebugCtrl1); + netif_start_queue(dev); + + spin_lock_irqsave(&np->lock, flags); + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flags); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + + /* Disable Wol */ + iowrite8(ioread8(ioaddr + WakeEvent) | 0x00, ioaddr + WakeEvent); + np->wol_enabled = 0; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: Done netdev_open(), status: Rx %x Tx %x " + "MAC Control %x, %4.4x %4.4x.\n", + dev->name, ioread32(ioaddr + RxStatus), ioread8(ioaddr + TxStatus), + ioread32(ioaddr + MACCtrl0), + ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); + + /* Set the timer to check for link beat. */ + timer_setup(&np->timer, netdev_timer, 0); + np->timer.expires = jiffies + 3*HZ; + add_timer(&np->timer); + + /* Enable interrupts by setting the interrupt mask. */ + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + + return 0; +} + +static void check_duplex(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + int negotiated = mii_lpa & np->mii_if.advertising; + int duplex; + + /* Force media */ + if (!np->an_enable || mii_lpa == 0xffff) { + if (np->mii_if.full_duplex) + iowrite16 (ioread16 (ioaddr + MACCtrl0) | EnbFullDuplex, + ioaddr + MACCtrl0); + return; + } + + /* Autonegotiation */ + duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040; + if (np->mii_if.full_duplex != duplex) { + np->mii_if.full_duplex = duplex; + if (netif_msg_link(np)) + printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d " + "negotiated capability %4.4x.\n", dev->name, + duplex ? "full" : "half", np->phys[0], negotiated); + iowrite16(ioread16(ioaddr + MACCtrl0) | (duplex ? 0x20 : 0), ioaddr + MACCtrl0); + } +} + +static void netdev_timer(struct timer_list *t) +{ + struct netdev_private *np = timer_container_of(np, t, timer); + struct net_device *dev = np->mii_if.dev; + void __iomem *ioaddr = np->base; + int next_tick = 10*HZ; + + if (netif_msg_timer(np)) { + printk(KERN_DEBUG "%s: Media selection timer tick, intr status %4.4x, " + "Tx %x Rx %x.\n", + dev->name, ioread16(ioaddr + IntrEnable), + ioread8(ioaddr + TxStatus), ioread32(ioaddr + RxStatus)); + } + check_duplex(dev); + np->timer.expires = jiffies + next_tick; + add_timer(&np->timer); +} + +static void tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flag; + + netif_stop_queue(dev); + tasklet_disable_in_atomic(&np->tx_tasklet); + iowrite16(0, ioaddr + IntrEnable); + printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " + "TxFrameId %2.2x," + " resetting...\n", dev->name, ioread8(ioaddr + TxStatus), + ioread8(ioaddr + TxFrameId)); + + { + int i; + for (i=0; itx_ring_dma + i*sizeof(*np->tx_ring)), + le32_to_cpu(np->tx_ring[i].next_desc), + le32_to_cpu(np->tx_ring[i].status), + (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, + le32_to_cpu(np->tx_ring[i].frag.addr), + le32_to_cpu(np->tx_ring[i].frag.length)); + } + printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", + ioread32(np->base + TxListPtr), + netif_queue_stopped(dev)); + printk(KERN_DEBUG "cur_tx=%d(%02x) dirty_tx=%d(%02x)\n", + np->cur_tx, np->cur_tx % TX_RING_SIZE, + np->dirty_tx, np->dirty_tx % TX_RING_SIZE); + printk(KERN_DEBUG "cur_rx=%d dirty_rx=%d\n", np->cur_rx, np->dirty_rx); + printk(KERN_DEBUG "cur_task=%d\n", np->cur_task); + } + spin_lock_irqsave(&np->lock, flag); + + /* Stop and restart the chip's Tx processes . */ + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flag); + + dev->if_port = 0; + + netif_trans_update(dev); /* prevent tx timeout */ + dev->stats.tx_errors++; + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + netif_wake_queue(dev); + } + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + tasklet_enable(&np->tx_tasklet); +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void init_ring(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + np->cur_rx = np->cur_tx = 0; + np->dirty_rx = np->dirty_tx = 0; + np->cur_task = 0; + + np->rx_buf_sz = (dev->mtu <= 1520 ? PKT_BUF_SZ : dev->mtu + 16); + + /* Initialize all Rx descriptors. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma + + ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring)); + np->rx_ring[i].status = 0; + np->rx_ring[i].frag.length = 0; + np->rx_skbuff[i] = NULL; + } + + /* Fill in the Rx buffers. Handle allocation failure gracefully. */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = + netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[i] = skb; + if (skb == NULL) + break; + skb_reserve(skb, 2); /* 16 byte align the IP header. */ + np->rx_ring[i].frag.addr = cpu_to_le32( + dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + np->rx_ring[i].frag.addr)) { + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + break; + } + np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); + } + np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_skbuff[i] = NULL; + np->tx_ring[i].status = 0; + } +} + +static void tx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, tx_tasklet); + unsigned head = np->cur_task % TX_RING_SIZE; + struct netdev_desc *txdesc = + &np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE]; + + /* Chain the next pointer */ + for (; np->cur_tx - np->cur_task > 0; np->cur_task++) { + int entry = np->cur_task % TX_RING_SIZE; + txdesc = &np->tx_ring[entry]; + if (np->last_tx) { + np->last_tx->next_desc = cpu_to_le32(np->tx_ring_dma + + entry*sizeof(struct netdev_desc)); + } + np->last_tx = txdesc; + } + /* Indicate the latest descriptor of tx ring */ + txdesc->status |= cpu_to_le32(DescIntrOnTx); + + if (ioread32 (np->base + TxListPtr) == 0) + iowrite32 (np->tx_ring_dma + head * sizeof(struct netdev_desc), + np->base + TxListPtr); +} + +static netdev_tx_t +start_tx (struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct netdev_desc *txdesc; + unsigned entry; + + /* Calculate the next Tx descriptor entry. */ + entry = np->cur_tx % TX_RING_SIZE; + np->tx_skbuff[entry] = skb; + txdesc = &np->tx_ring[entry]; + + txdesc->next_desc = 0; + txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); + txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, + skb->data, skb->len, DMA_TO_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + txdesc->frag.addr)) + goto drop_frame; + txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); + + /* Increment cur_tx before tasklet_schedule() */ + np->cur_tx++; + mb(); + /* Schedule a tx_poll() task */ + tasklet_schedule(&np->tx_tasklet); + + /* On some architectures: explicitly flush cache lines here. */ + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1 && + !netif_queue_stopped(dev)) { + /* do nothing */ + } else { + netif_stop_queue (dev); + } + if (netif_msg_tx_queued(np)) { + printk (KERN_DEBUG + "%s: Transmit frame #%d queued in slot %d.\n", + dev->name, np->cur_tx, entry); + } + return NETDEV_TX_OK; + +drop_frame: + dev_kfree_skb_any(skb); + np->tx_skbuff[entry] = NULL; + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +/* Reset hardware tx and free all of tx buffers */ +static int +reset_tx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Reset tx logic, TxListPtr will be cleaned */ + iowrite16 (TxDisable, ioaddr + MACCtrl1); + sundance_reset(dev, (NetworkReset|FIFOReset|DMAReset|TxReset) << 16); + + /* free all tx skbuff */ + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + np->tx_skbuff[i] = NULL; + dev->stats.tx_dropped++; + } + } + np->cur_tx = np->dirty_tx = 0; + np->cur_task = 0; + + np->last_tx = NULL; + iowrite8(127, ioaddr + TxDMAPollPeriod); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + return 0; +} + +/* The interrupt handler cleans up after the Tx thread, + and schedule a Rx thread work */ +static irqreturn_t intr_handler(int irq, void *dev_instance) +{ + struct net_device *dev = (struct net_device *)dev_instance; + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int hw_frame_id; + int tx_cnt; + int tx_status; + int handled = 0; + int i; + + do { + int intr_status = ioread16(ioaddr + IntrStatus); + iowrite16(intr_status, ioaddr + IntrStatus); + + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", + dev->name, intr_status); + + if (!(intr_status & DEFAULT_INTR)) + break; + + handled = 1; + + if (intr_status & (IntrRxDMADone)) { + iowrite16(DEFAULT_INTR & ~(IntrRxDone|IntrRxDMADone), + ioaddr + IntrEnable); + if (np->budget < 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); + } + if (intr_status & (IntrTxDone | IntrDrvRqst)) { + tx_status = ioread16 (ioaddr + TxStatus); + for (tx_cnt=32; tx_status & 0x80; --tx_cnt) { + if (netif_msg_tx_done(np)) + printk + ("%s: Transmit status is %2.2x.\n", + dev->name, tx_status); + if (tx_status & 0x1e) { + if (netif_msg_tx_err(np)) + printk("%s: Transmit error status %4.4x.\n", + dev->name, tx_status); + dev->stats.tx_errors++; + if (tx_status & 0x10) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x08) + dev->stats.collisions++; + if (tx_status & 0x04) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x02) + dev->stats.tx_window_errors++; + + /* + ** This reset has been verified on + ** DFE-580TX boards ! phdm@macqel.be. + */ + if (tx_status & 0x10) { /* TxUnderrun */ + /* Restart Tx FIFO and transmitter */ + sundance_reset(dev, (NetworkReset|FIFOReset|TxReset) << 16); + /* No need to reset the Tx pointer here */ + } + /* Restart the Tx. Need to make sure tx enabled */ + i = 10; + do { + iowrite16(ioread16(ioaddr + MACCtrl1) | TxEnable, ioaddr + MACCtrl1); + if (ioread16(ioaddr + MACCtrl1) & TxEnabled) + break; + mdelay(1); + } while (--i); + } + /* Yup, this is a documentation bug. It cost me *hours*. */ + iowrite16 (0, ioaddr + TxStatus); + if (tx_cnt < 0) { + iowrite32(5000, ioaddr + DownCounter); + break; + } + tx_status = ioread16 (ioaddr + TxStatus); + } + hw_frame_id = (tx_status >> 8) & 0xff; + } else { + hw_frame_id = ioread8(ioaddr + TxFrameId); + } + + if (np->pci_dev->revision >= 0x14) { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + int sw_frame_id; + sw_frame_id = (le32_to_cpu( + np->tx_ring[entry].status) >> 2) & 0xff; + if (sw_frame_id == hw_frame_id && + !(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + if (sw_frame_id == (hw_frame_id + 1) % + TX_RING_SIZE) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } else { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + if (!(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } + + if (netif_queue_stopped(dev) && + np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + /* The ring is no longer full, clear busy flag. */ + netif_wake_queue (dev); + } + /* Abnormal error summary/uncommon events handlers. */ + if (intr_status & (IntrPCIErr | LinkChange | StatsMax)) + netdev_error(dev, intr_status); + } while (0); + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", + dev->name, ioread16(ioaddr + IntrStatus)); + return IRQ_RETVAL(handled); +} + +static void rx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, rx_tasklet); + struct net_device *dev = np->ndev; + int entry = np->cur_rx % RX_RING_SIZE; + int boguscnt = np->budget; + void __iomem *ioaddr = np->base; + int received = 0; + + /* If EOP is set on the next entry, it's a new packet. Send it up. */ + while (1) { + struct netdev_desc *desc = &(np->rx_ring[entry]); + u32 frame_status = le32_to_cpu(desc->status); + int pkt_len; + + if (--boguscnt < 0) { + goto not_done; + } + if (!(frame_status & DescOwn)) + break; + pkt_len = frame_status & 0x1fff; /* Chip omits the CRC. */ + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n", + frame_status); + if (frame_status & 0x001f4000) { + /* There was a error. */ + if (netif_msg_rx_err(np)) + printk(KERN_DEBUG " netdev_rx() Rx error was %8.8x.\n", + frame_status); + dev->stats.rx_errors++; + if (frame_status & 0x00100000) + dev->stats.rx_length_errors++; + if (frame_status & 0x00010000) + dev->stats.rx_fifo_errors++; + if (frame_status & 0x00060000) + dev->stats.rx_frame_errors++; + if (frame_status & 0x00080000) + dev->stats.rx_crc_errors++; + if (frame_status & 0x00100000) { + printk(KERN_WARNING "%s: Oversized Ethernet frame," + " status %8.8x.\n", + dev->name, frame_status); + } + } else { + struct sk_buff *skb; +#ifndef final_version + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d" + ", bogus_cnt %d.\n", + pkt_len, boguscnt); +#endif + /* Check if the packet is long enough to accept without copying + to a minimally-sized skbuff. */ + if (pkt_len < rx_copybreak && + (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* 16 byte align the IP header */ + dma_sync_single_for_cpu(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); + dma_sync_single_for_device(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb, pkt_len); + } else { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb = np->rx_skbuff[entry], pkt_len); + np->rx_skbuff[entry] = NULL; + } + skb->protocol = eth_type_trans(skb, dev); + /* Note: checksum -> skb->ip_summed = CHECKSUM_UNNECESSARY; */ + netif_rx(skb); + } + entry = (entry + 1) % RX_RING_SIZE; + received++; + } + np->cur_rx = entry; + refill_rx (dev); + np->budget -= received; + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + return; + +not_done: + np->cur_rx = entry; + refill_rx (dev); + if (!received) + received = 1; + np->budget -= received; + if (np->budget <= 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); +} + +static void refill_rx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int entry; + + /* Refill the Rx ring buffers. */ + for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0; + np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) { + struct sk_buff *skb; + entry = np->dirty_rx % RX_RING_SIZE; + if (np->rx_skbuff[entry] == NULL) { + skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[entry] = skb; + if (skb == NULL) + break; /* Better luck next round. */ + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + np->rx_ring[entry].frag.addr = cpu_to_le32( + dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + np->rx_ring[entry].frag.addr)) { + dev_kfree_skb_irq(skb); + np->rx_skbuff[entry] = NULL; + break; + } + } + /* Perhaps we need not reset this field. */ + np->rx_ring[entry].frag.length = + cpu_to_le32(np->rx_buf_sz | LastFrag); + np->rx_ring[entry].status = 0; + } +} +static void netdev_error(struct net_device *dev, int intr_status) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mii_ctl, mii_advertise, mii_lpa; + int speed; + + if (intr_status & LinkChange) { + if (mdio_wait_link(dev, 10) == 0) { + printk(KERN_INFO "%s: Link up\n", dev->name); + if (np->an_enable) { + mii_advertise = mdio_read(dev, np->phys[0], + MII_ADVERTISE); + mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + mii_advertise &= mii_lpa; + printk(KERN_INFO "%s: Link changed: ", + dev->name); + if (mii_advertise & ADVERTISE_100FULL) { + np->speed = 100; + printk("100Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_100HALF) { + np->speed = 100; + printk("100Mbps, half duplex\n"); + } else if (mii_advertise & ADVERTISE_10FULL) { + np->speed = 10; + printk("10Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_10HALF) { + np->speed = 10; + printk("10Mbps, half duplex\n"); + } else + printk("\n"); + + } else { + mii_ctl = mdio_read(dev, np->phys[0], MII_BMCR); + speed = (mii_ctl & BMCR_SPEED100) ? 100 : 10; + np->speed = speed; + printk(KERN_INFO "%s: Link changed: %dMbps ,", + dev->name, speed); + printk("%s duplex.\n", + (mii_ctl & BMCR_FULLDPLX) ? + "full" : "half"); + } + check_duplex(dev); + if (np->flowctrl && np->mii_if.full_duplex) { + iowrite16(ioread16(ioaddr + MulticastFilter1+2) | 0x0200, + ioaddr + MulticastFilter1+2); + iowrite16(ioread16(ioaddr + MACCtrl0) | EnbFlowCtrl, + ioaddr + MACCtrl0); + } + netif_carrier_on(dev); + } else { + printk(KERN_INFO "%s: Link down\n", dev->name); + netif_carrier_off(dev); + } + } + if (intr_status & StatsMax) { + get_stats(dev); + } + if (intr_status & IntrPCIErr) { + printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n", + dev->name, intr_status); + /* We must do a global reset of DMA to continue. */ + } +} + +static struct net_device_stats *get_stats(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flags; + u8 late_coll, single_coll, mult_coll; + + spin_lock_irqsave(&np->statlock, flags); + /* The chip only need report frame silently dropped. */ + dev->stats.rx_missed_errors += ioread8(ioaddr + RxMissed); + dev->stats.tx_packets += ioread16(ioaddr + TxFramesOK); + dev->stats.rx_packets += ioread16(ioaddr + RxFramesOK); + dev->stats.tx_carrier_errors += ioread8(ioaddr + StatsCarrierError); + + mult_coll = ioread8(ioaddr + StatsMultiColl); + np->xstats.tx_multiple_collisions += mult_coll; + single_coll = ioread8(ioaddr + StatsOneColl); + np->xstats.tx_single_collisions += single_coll; + late_coll = ioread8(ioaddr + StatsLateColl); + np->xstats.tx_late_collisions += late_coll; + dev->stats.collisions += mult_coll + + single_coll + + late_coll; + + np->xstats.tx_deferred += ioread8(ioaddr + StatsTxDefer); + np->xstats.tx_deferred_excessive += ioread8(ioaddr + StatsTxXSDefer); + np->xstats.tx_aborted += ioread8(ioaddr + StatsTxAbort); + np->xstats.tx_bcasts += ioread8(ioaddr + StatsBcastTx); + np->xstats.rx_bcasts += ioread8(ioaddr + StatsBcastRx); + np->xstats.tx_mcasts += ioread8(ioaddr + StatsMcastTx); + np->xstats.rx_mcasts += ioread8(ioaddr + StatsMcastRx); + + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsLow); + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsHigh) << 16; + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsLow); + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsHigh) << 16; + + spin_unlock_irqrestore(&np->statlock, flags); + + return &dev->stats; +} + +static void set_rx_mode(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mc_filter[4]; /* Multicast hash filter */ + u32 rx_mode; + int i; + + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptAll | AcceptMyPhys; + } else if ((netdev_mc_count(dev) > multicast_filter_limit) || + (dev->flags & IFF_ALLMULTI)) { + /* Too many to match, or accept all multicasts. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + } else if (!netdev_mc_empty(dev)) { + struct netdev_hw_addr *ha; + int bit; + int index; + int crc; + memset (mc_filter, 0, sizeof (mc_filter)); + netdev_for_each_mc_addr(ha, dev) { + crc = ether_crc_le(ETH_ALEN, ha->addr); + for (index=0, bit=0; bit < 6; bit++, crc <<= 1) + if (crc & 0x80000000) index |= 1 << bit; + mc_filter[index/16] |= (1 << (index % 16)); + } + rx_mode = AcceptBroadcast | AcceptMultiHash | AcceptMyPhys; + } else { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + return; + } + if (np->mii_if.full_duplex && np->flowctrl) + mc_filter[3] |= 0x0200; + + for (i = 0; i < 4; i++) + iowrite16(mc_filter[i], ioaddr + MulticastFilter0 + i*2); + iowrite8(rx_mode, ioaddr + RxMode); +} + +static int __set_mac_addr(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + u16 addr16; + + addr16 = (dev->dev_addr[0] | (dev->dev_addr[1] << 8)); + iowrite16(addr16, np->base + StationAddr); + addr16 = (dev->dev_addr[2] | (dev->dev_addr[3] << 8)); + iowrite16(addr16, np->base + StationAddr+2); + addr16 = (dev->dev_addr[4] | (dev->dev_addr[5] << 8)); + iowrite16(addr16, np->base + StationAddr+4); + return 0; +} + +/* Invoked with rtnl_lock held */ +static int sundance_set_mac_addr(struct net_device *dev, void *data) +{ + const struct sockaddr *addr = data; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + eth_hw_addr_set(dev, addr->sa_data); + __set_mac_addr(dev); + + return 0; +} + +static const struct { + const char name[ETH_GSTRING_LEN]; +} sundance_stats[] = { + { "tx_multiple_collisions" }, + { "tx_single_collisions" }, + { "tx_late_collisions" }, + { "tx_deferred" }, + { "tx_deferred_excessive" }, + { "tx_aborted" }, + { "tx_bcasts" }, + { "rx_bcasts" }, + { "tx_mcasts" }, + { "rx_mcasts" }, +}; + +static int check_if_running(struct net_device *dev) +{ + if (!netif_running(dev)) + return -EINVAL; + return 0; +} + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct netdev_private *np = netdev_priv(dev); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); +} + +static int get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + spin_lock_irq(&np->lock); + mii_ethtool_get_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return 0; +} + +static int set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int res; + spin_lock_irq(&np->lock); + res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return res; +} + +static int nway_reset(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_nway_restart(&np->mii_if); +} + +static u32 get_link(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_link_ok(&np->mii_if); +} + +static u32 get_msglevel(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return np->msg_enable; +} + +static void set_msglevel(struct net_device *dev, u32 val) +{ + struct netdev_private *np = netdev_priv(dev); + np->msg_enable = val; +} + +static void get_strings(struct net_device *dev, u32 stringset, + u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, sundance_stats, sizeof(sundance_stats)); +} + +static int get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(sundance_stats); + default: + return -EOPNOTSUPP; + } +} + +static void get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct netdev_private *np = netdev_priv(dev); + int i = 0; + + get_stats(dev); + data[i++] = np->xstats.tx_multiple_collisions; + data[i++] = np->xstats.tx_single_collisions; + data[i++] = np->xstats.tx_late_collisions; + data[i++] = np->xstats.tx_deferred; + data[i++] = np->xstats.tx_deferred_excessive; + data[i++] = np->xstats.tx_aborted; + data[i++] = np->xstats.tx_bcasts; + data[i++] = np->xstats.rx_bcasts; + data[i++] = np->xstats.tx_mcasts; + data[i++] = np->xstats.rx_mcasts; +} + +#ifdef CONFIG_PM + +static void sundance_get_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + wol->wolopts = 0; + + wol->supported = (WAKE_PHY | WAKE_MAGIC); + if (!np->wol_enabled) + return; + + wol_bits = ioread8(ioaddr + WakeEvent); + if (wol_bits & MagicPktEnable) + wol->wolopts |= WAKE_MAGIC; + if (wol_bits & LinkEventEnable) + wol->wolopts |= WAKE_PHY; +} + +static int sundance_set_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + if (!device_can_wakeup(&np->pci_dev->dev)) + return -EOPNOTSUPP; + + np->wol_enabled = !!(wol->wolopts); + wol_bits = ioread8(ioaddr + WakeEvent); + wol_bits &= ~(WakePktEnable | MagicPktEnable | + LinkEventEnable | WolEnable); + + if (np->wol_enabled) { + if (wol->wolopts & WAKE_MAGIC) + wol_bits |= (MagicPktEnable | WolEnable); + if (wol->wolopts & WAKE_PHY) + wol_bits |= (LinkEventEnable | WolEnable); + } + iowrite8(wol_bits, ioaddr + WakeEvent); + + device_set_wakeup_enable(&np->pci_dev->dev, np->wol_enabled); + + return 0; +} +#else +#define sundance_get_wol NULL +#define sundance_set_wol NULL +#endif /* CONFIG_PM */ + +static const struct ethtool_ops ethtool_ops = { + .begin = check_if_running, + .get_drvinfo = get_drvinfo, + .nway_reset = nway_reset, + .get_link = get_link, + .get_wol = sundance_get_wol, + .set_wol = sundance_set_wol, + .get_msglevel = get_msglevel, + .set_msglevel = set_msglevel, + .get_strings = get_strings, + .get_sset_count = get_sset_count, + .get_ethtool_stats = get_ethtool_stats, + .get_link_ksettings = get_link_ksettings, + .set_link_ksettings = set_link_ksettings, +}; + +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int rc; + + if (!netif_running(dev)) + return -EINVAL; + + spin_lock_irq(&np->lock); + rc = generic_mii_ioctl(&np->mii_if, if_mii(rq), cmd, NULL); + spin_unlock_irq(&np->lock); + + return rc; +} + +static int netdev_close(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Wait and kill tasklet */ + tasklet_kill(&np->rx_tasklet); + tasklet_kill(&np->tx_tasklet); + np->cur_tx = 0; + np->dirty_tx = 0; + np->cur_task = 0; + np->last_tx = NULL; + + netif_stop_queue(dev); + + if (netif_msg_ifdown(np)) { + printk(KERN_DEBUG "%s: Shutting down ethercard, status was Tx %2.2x " + "Rx %4.4x Int %2.2x.\n", + dev->name, ioread8(ioaddr + TxStatus), + ioread32(ioaddr + RxStatus), ioread16(ioaddr + IntrStatus)); + printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n", + dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx); + } + + /* Disable interrupts by clearing the interrupt mask. */ + iowrite16(0x0000, ioaddr + IntrEnable); + + /* Disable Rx and Tx DMA for safely release resource */ + iowrite32(0x500, ioaddr + DMACtrl); + + /* Stop the chip's Tx and Rx processes. */ + iowrite16(TxDisable | RxDisable | StatsDisable, ioaddr + MACCtrl1); + + for (i = 2000; i > 0; i--) { + if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0) + break; + mdelay(1); + } + + iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset, + ioaddr + ASIC_HI_WORD(ASICCtrl)); + + for (i = 2000; i > 0; i--) { + if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0) + break; + mdelay(1); + } + +#ifdef __i386__ + if (netif_msg_hw(np)) { + printk(KERN_DEBUG " Tx ring at %8.8x:\n", + (int)(np->tx_ring_dma)); + for (i = 0; i < TX_RING_SIZE; i++) + printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", + i, np->tx_ring[i].status, np->tx_ring[i].frag.addr, + np->tx_ring[i].frag.length); + printk(KERN_DEBUG " Rx ring %8.8x:\n", + (int)(np->rx_ring_dma)); + for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { + printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", + i, np->rx_ring[i].status, np->rx_ring[i].frag.addr, + np->rx_ring[i].frag.length); + } + } +#endif /* __i386__ debugging only */ + + free_irq(np->pci_dev->irq, dev); + + timer_delete_sync(&np->timer); + + /* Free all the skbuffs in the Rx queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].status = 0; + skb = np->rx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->rx_ring[i].frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + } + np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */ + } + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb(skb); + np->tx_skbuff[i] = NULL; + } + } + + return 0; +} + +static void sundance_remove1(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev) { + struct netdev_private *np = netdev_priv(dev); + unregister_netdev(dev); + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); + pci_iounmap(pdev, np->base); + pci_release_regions(pdev); + free_netdev(dev); + } +} + +static int __maybe_unused sundance_suspend(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + + if (!netif_running(dev)) + return 0; + + netdev_close(dev); + netif_device_detach(dev); + + if (np->wol_enabled) { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + iowrite16(RxEnable, ioaddr + MACCtrl1); + } + + device_set_wakeup_enable(dev_d, np->wol_enabled); + + return 0; +} + +static int __maybe_unused sundance_resume(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + int err = 0; + + if (!netif_running(dev)) + return 0; + + err = netdev_open(dev); + if (err) { + printk(KERN_ERR "%s: Can't resume interface!\n", + dev->name); + goto out; + } + + netif_device_attach(dev); + +out: + return err; +} + +static SIMPLE_DEV_PM_OPS(sundance_pm_ops, sundance_suspend, sundance_resume); + +static struct pci_driver sundance_driver = { + .name = DRV_NAME, + .id_table = sundance_pci_tbl, + .probe = sundance_probe1, + .remove = sundance_remove1, + .driver.pm = &sundance_pm_ops, +}; + +module_pci_driver(sundance_driver); -- cgit v1.2.3 From d2644cbc736f737142a7595fa9346f63e6fc9b33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Sep 2025 14:08:18 -0700 Subject: eth: sundance: fix endian issues Fix sparse warnings about endianness. Store DMA addr to a variable of correct type and then only convert it when writing to the descriptor. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901210818.1025316-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/sundance.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 29d59c42dfa3..277c50ef773f 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -1033,21 +1033,22 @@ static void init_ring(struct net_device *dev) /* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { + dma_addr_t addr; + struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); np->rx_skbuff[i] = skb; if (skb == NULL) break; skb_reserve(skb, 2); /* 16 byte align the IP header. */ - np->rx_ring[i].frag.addr = cpu_to_le32( - dma_map_single(&np->pci_dev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[i].frag.addr)) { + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; break; } + np->rx_ring[i].frag.addr = cpu_to_le32(addr); np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); } np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); @@ -1088,6 +1089,7 @@ start_tx (struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); struct netdev_desc *txdesc; + dma_addr_t addr; unsigned entry; /* Calculate the next Tx descriptor entry. */ @@ -1095,13 +1097,14 @@ start_tx (struct sk_buff *skb, struct net_device *dev) np->tx_skbuff[entry] = skb; txdesc = &np->tx_ring[entry]; + addr = dma_map_single(&np->pci_dev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) + goto drop_frame; + txdesc->next_desc = 0; txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); - txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, - skb->data, skb->len, DMA_TO_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - txdesc->frag.addr)) - goto drop_frame; + txdesc->frag.addr = cpu_to_le32(addr); txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); /* Increment cur_tx before tasklet_schedule() */ @@ -1419,6 +1422,8 @@ static void refill_rx (struct net_device *dev) for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0; np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) { struct sk_buff *skb; + dma_addr_t addr; + entry = np->dirty_rx % RX_RING_SIZE; if (np->rx_skbuff[entry] == NULL) { skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); @@ -1426,15 +1431,15 @@ static void refill_rx (struct net_device *dev) if (skb == NULL) break; /* Better luck next round. */ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - np->rx_ring[entry].frag.addr = cpu_to_le32( - dma_map_single(&np->pci_dev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[entry].frag.addr)) { + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { dev_kfree_skb_irq(skb); np->rx_skbuff[entry] = NULL; break; } + + np->rx_ring[entry].frag.addr = cpu_to_le32(addr); } /* Perhaps we need not reset this field. */ np->rx_ring[entry].frag.length = -- cgit v1.2.3 From fa390321aba0a54d0f7ae95ee4ecde1358bb9234 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sat, 30 Aug 2025 15:55:38 -0700 Subject: net/tcp: Fix socket memory leak in TCP-AO failure handling for IPv6 When tcp_ao_copy_all_matching() fails in tcp_v6_syn_recv_sock() it just exits the function. This ends up causing a memory-leak: unreferenced object 0xffff0000281a8200 (size 2496): comm "softirq", pid 0, jiffies 4295174684 hex dump (first 32 bytes): 7f 00 00 06 7f 00 00 06 00 00 00 00 cb a8 88 13 ................ 0a 00 03 61 00 00 00 00 00 00 00 00 00 00 00 00 ...a............ backtrace (crc 5ebdbe15): kmemleak_alloc+0x44/0xe0 kmem_cache_alloc_noprof+0x248/0x470 sk_prot_alloc+0x48/0x120 sk_clone_lock+0x38/0x3b0 inet_csk_clone_lock+0x34/0x150 tcp_create_openreq_child+0x3c/0x4a8 tcp_v6_syn_recv_sock+0x1c0/0x620 tcp_check_req+0x588/0x790 tcp_v6_rcv+0x5d0/0xc18 ip6_protocol_deliver_rcu+0x2d8/0x4c0 ip6_input_finish+0x74/0x148 ip6_input+0x50/0x118 ip6_sublist_rcv+0x2fc/0x3b0 ipv6_list_rcv+0x114/0x170 __netif_receive_skb_list_core+0x16c/0x200 netif_receive_skb_list_internal+0x1f0/0x2d0 This is because in tcp_v6_syn_recv_sock (and the IPv4 counterpart), when exiting upon error, inet_csk_prepare_forced_close() and tcp_done() need to be called. They make sure the newsk will end up being correctly free'd. tcp_v4_syn_recv_sock() makes this very clear by having the put_and_exit label that takes care of things. So, this patch here makes sure tcp_v4_syn_recv_sock and tcp_v6_syn_recv_sock have similar error-handling and thus fixes the leak for TCP-AO. Fixes: 06b22ef29591 ("net/tcp: Wire TCP-AO to request sockets") Signed-off-by: Christoph Paasch Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20250830-tcpao_leak-v1-1-e5878c2c3173@openai.com Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7577e7eb2c97..e885629312a4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1431,17 +1431,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) - goto out_overflow; + goto exit_overflow; if (!dst) { dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) - goto out; + goto exit; } newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) - goto out_nonewsk; + goto exit_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks @@ -1525,25 +1525,19 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const union tcp_md5_addr *addr; addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; - } + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) + goto put_and_exit; } } #endif #ifdef CONFIG_TCP_AO /* Copy over tcp_ao_info if any */ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) - goto out; /* OOM */ + goto put_and_exit; /* OOM */ #endif - if (__inet_inherit_port(sk, newsk) < 0) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; - } + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), &found_dup_sk); if (*own_req) { @@ -1570,13 +1564,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * return newsk; -out_overflow: +exit_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); -out_nonewsk: +exit_nonewsk: dst_release(dst); -out: +exit: tcp_listendrop(sk); return NULL; +put_and_exit: + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto exit; } INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, -- cgit v1.2.3 From 4beb44a2d62dddfe450f310aa1a950901731cb3a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:33 +0100 Subject: net: phy: add phy_interface_weight() Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslwn-00000001SOx-0a7H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c2b8b6e7187..bb45787d8684 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf) return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); } +static inline unsigned int phy_interface_weight(const unsigned long *intf) +{ + return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX); +} + static inline void phy_interface_and(unsigned long *dst, const unsigned long *a, const unsigned long *b) { -- cgit v1.2.3 From 1bd905dfea9897eafef532000702e63a66849f54 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:38 +0100 Subject: net: phylink: provide phylink_get_inband_type() Provide a function to get the type of the inband signalling used for a PHY interface type. This will be used in the subsequent patch to address problems with 10G optical modules. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslws-00000001SP5-1R2R@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 79 ++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c7f867b361dd..8283416ccf5d 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1016,6 +1016,42 @@ static void phylink_pcs_an_restart(struct phylink *pl) pl->pcs->ops->pcs_an_restart(pl->pcs); } +enum inband_type { + INBAND_NONE, + INBAND_CISCO_SGMII, + INBAND_BASEX, +}; + +static enum inband_type phylink_get_inband_type(phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: + /* These protocols are designed for use with a PHY which + * communicates its negotiation result back to the MAC via + * inband communication. Note: there exist PHYs that run + * with SGMII but do not send the inband data. + */ + return INBAND_CISCO_SGMII; + + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + /* 1000base-X is designed for use media-side for Fibre + * connections, and thus the Autoneg bit needs to be + * taken into account. We also do this for 2500base-X + * as well, but drivers may not support this, so may + * need to override this. + */ + return INBAND_BASEX; + + default: + return INBAND_NONE; + } +} + /** * phylink_pcs_neg_mode() - helper to determine PCS inband mode * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -1043,46 +1079,19 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, unsigned int pcs_ib_caps = 0; unsigned int phy_ib_caps = 0; unsigned int neg_mode, mode; - enum { - INBAND_CISCO_SGMII, - INBAND_BASEX, - } type; - - mode = pl->req_link_an_mode; + enum inband_type type; - pl->phy_ib_mode = 0; - - switch (interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_USXGMII: - case PHY_INTERFACE_MODE_10G_QXGMII: - /* These protocols are designed for use with a PHY which - * communicates its negotiation result back to the MAC via - * inband communication. Note: there exist PHYs that run - * with SGMII but do not send the inband data. - */ - type = INBAND_CISCO_SGMII; - break; - - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* 1000base-X is designed for use media-side for Fibre - * connections, and thus the Autoneg bit needs to be - * taken into account. We also do this for 2500base-X - * as well, but drivers may not support this, so may - * need to override this. - */ - type = INBAND_BASEX; - break; - - default: + type = phylink_get_inband_type(interface); + if (type == INBAND_NONE) { pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE; - pl->act_link_an_mode = mode; + pl->act_link_an_mode = pl->req_link_an_mode; return; } + mode = pl->req_link_an_mode; + + pl->phy_ib_mode = 0; + if (pcs) pcs_ib_caps = phylink_pcs_inband_caps(pcs, interface); -- cgit v1.2.3 From a21202743f9ce4063e86b99cccaef48ef9813379 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:43 +0100 Subject: net: phylink: disable autoneg for interfaces that have no inband Mathew reports that as a result of commit 6561f0e547be ("net: pcs: pcs-lynx: implement pcs_inband_caps() method"), 10G SFP modules no longer work with the Lynx PCS. This problem is not specific to the Lynx PCS, but is caused by commit df874f9e52c3 ("net: phylink: add pcs_inband_caps() method") which added validation of the autoneg state to the optical SFP configuration path. Fix this by handling interface modes that fundamentally have no inband negotiation more correctly - if we only have a single interface mode, clear the Autoneg support bit and the advertising mask. If the module can operate with several different interface modes, autoneg may be supported for other modes, so leave the support mask alone and just clear the Autoneg bit in the advertising mask. This restores 10G optical module functionality with PCS that supply their inband support, and makes ethtool output look sane. Reported-by: Mathew McBride Closes: https://lore.kernel.org/r/025c0ebe-5537-4fa3-b05a-8b835e5ad317@app.fastmail.com Fixes: df874f9e52c3 ("net: phylink: add pcs_inband_caps() method") Signed-off-by: Russell King (Oracle) Tested-by: Vladimir Oltean Link: https://patch.msgid.link/E1uslwx-00000001SPB-2kiM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 8283416ccf5d..f1b57e3fdf30 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3634,6 +3634,7 @@ static int phylink_sfp_config_optical(struct phylink *pl) { __ETHTOOL_DECLARE_LINK_MODE_MASK(support); struct phylink_link_state config; + enum inband_type inband_type; phy_interface_t interface; int ret; @@ -3680,6 +3681,23 @@ static int phylink_sfp_config_optical(struct phylink *pl) phylink_dbg(pl, "optical SFP: chosen %s interface\n", phy_modes(interface)); + inband_type = phylink_get_inband_type(interface); + if (inband_type == INBAND_NONE) { + /* If this is the sole interface, and there is no inband + * support, clear the advertising mask and Autoneg bit in + * the support mask. Otherwise, just clear the Autoneg bit + * in the advertising mask. + */ + if (phy_interface_weight(pl->sfp_interfaces) == 1) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + pl->sfp_support); + linkmode_zero(config.advertising); + } else { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + config.advertising); + } + } + if (!phylink_validate_pcs_inband_autoneg(pl, interface, config.advertising)) { phylink_err(pl, "autoneg setting not compatible with PCS"); -- cgit v1.2.3 From d4736737110ffa83d29f1c5d17b26113864205f6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 31 Aug 2025 20:20:07 +0200 Subject: net: ethernet: mtk_eth_soc: fix tx vlan tag for llc packets When sending llc packets with vlan tx offload, the hardware fails to actually add the tag. Deal with this by fixing it up in software. Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet") Reported-by: Thibaut VARENE Signed-off-by: Felix Fietkau Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250831182007.51619-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a5fcde76dc0..e68997a29191 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1761,6 +1761,13 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) bool gso = false; int tx_num; + if (skb_vlan_tag_present(skb) && + !eth_proto_is_802_3(eth_hdr(skb)->h_proto)) { + skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + goto dropped; + } + /* normally we can rely on the stack not calling this more than once, * however we have 2 queues running on the same ring so we need to lock * the ring access @@ -1806,8 +1813,9 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: spin_unlock(ð->page_lock); - stats->tx_dropped++; dev_kfree_skb_any(skb); +dropped: + stats->tx_dropped++; return NETDEV_TX_OK; } -- cgit v1.2.3 From a7195a3d67dace056af7ca65144a11874df79562 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 1 Sep 2025 12:20:19 +0100 Subject: net: pcs: rzn1-miic: Correct MODCTRL register offset Correct the Mode Control Register (MODCTRL) offset for RZ/N MIIC. According to the R-IN Engine and Ethernet Peripherals Manual (Rev.1.30) [0], Table 10.1 "Ethernet Accessory Register List", MODCTRL is at offset 0x8, not 0x20 as previously defined. Offset 0x20 actually maps to the Port Trigger Control Register (PTCTRL), which controls PTP_MODE[3:0] and RGMII_CLKSEL[4]. Using this incorrect definition prevented the driver from configuring the SW_MODE[4:0] bits in MODCTRL, which control the internal connection of Ethernet ports. As a result, the MIIC could not be switched into the correct mode, leading to link setup failures and non-functional Ethernet ports on affected systems. [0] https://www.renesas.com/en/document/mah/rzn1d-group-rzn1s-group-rzn1l-group-users-manual-r-engine-and-ethernet-peripherals?r=1054571 Fixes: 7dc54d3b8d91 ("net: pcs: add Renesas MII converter driver") Cc: stable@kernel.org Signed-off-by: Lad Prabhakar Reviewed-by: Wolfram Sang Reviewed-by: Russell King (Oracle) Reviewed-by: Geert Uytterhoeven Tested-by: Wolfram Sang Link: https://patch.msgid.link/20250901112019.16278-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-rzn1-miic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index d79bb9b06cd2..ce73d9474d5b 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -19,7 +19,7 @@ #define MIIC_PRCMD 0x0 #define MIIC_ESID_CODE 0x4 -#define MIIC_MODCTRL 0x20 +#define MIIC_MODCTRL 0x8 #define MIIC_MODCTRL_SW_MODE GENMASK(4, 0) #define MIIC_CONVCTRL(port) (0x100 + (port) * 4) -- cgit v1.2.3 From 6ead38147ebb813f08be6ea8ef547a0e4c09559a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:33 +0300 Subject: vxlan: Fix NPD when refreshing an FDB entry with a nexthop object VXLAN FDB entries can point to either a remote destination or an FDB nexthop group. The latter is usually used in EVPN deployments where learning is disabled. However, when learning is enabled, an incoming packet might try to refresh an FDB entry that points to an FDB nexthop group and therefore does not have a remote. Such packets should be dropped, but they are only dropped after dereferencing the non-existent remote, resulting in a NPD [1] which can be reproduced using [2]. Fix by dropping such packets earlier. Remove the misleading comment from first_remote_rcu(). [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 13 UID: 0 PID: 361 Comm: mausezahn Not tainted 6.17.0-rc1-virtme-g9f6b606b6b37 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014 RIP: 0010:vxlan_snoop+0x98/0x1e0 [...] Call Trace: vxlan_encap_bypass+0x209/0x240 encap_bypass_if_local+0xb1/0x100 vxlan_xmit_one+0x1375/0x17e0 vxlan_xmit+0x6b4/0x15f0 dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 packet_sendmsg+0x113a/0x1850 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [2] #!/bin/bash ip address add 192.0.2.1/32 dev lo ip address add 192.0.2.2/32 dev lo ip nexthop add id 1 via 192.0.2.3 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass ip link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning bridge fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020 bridge fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Reported-by: Marlin Cremers Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 8 ++++---- drivers/net/vxlan/vxlan_private.h | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index f32be2e301f2..0f6a7c89a669 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, if (READ_ONCE(f->updated) != now) WRITE_ONCE(f->updated, now); + /* Don't override an fdb with nexthop with a learnt entry */ + if (rcu_access_pointer(f->nh)) + return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; + if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) return SKB_NOT_DROPPED_YET; @@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, if (f->state & (NUD_PERMANENT | NUD_NOARP)) return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; - /* Don't override an fdb with nexthop with a learnt entry */ - if (rcu_access_pointer(f->nh)) - return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; - if (net_ratelimit()) netdev_info(dev, "%pM migrated from %pIS to %pIS\n", diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 6c625fb29c6c..99fe772ad679 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port) return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; } -/* First remote destination for a forwarding entry. - * Guaranteed to be non-NULL because remotes are never deleted. - */ +/* First remote destination for a forwarding entry. */ static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) { if (rcu_access_pointer(fdb->nh)) -- cgit v1.2.3 From 1f5d2fd1ca04a23c18b1bde9a43ce2fa2ffa1bce Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:34 +0300 Subject: vxlan: Fix NPD in {arp,neigh}_reduce() when using nexthop objects When the "proxy" option is enabled on a VXLAN device, the device will suppress ARP requests and IPv6 Neighbor Solicitation messages if it is able to reply on behalf of the remote host. That is, if a matching and valid neighbor entry is configured on the VXLAN device whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The code currently assumes that the FDB entry for the neighbor's MAC address points to a valid remote destination, but this is incorrect if the entry is associated with an FDB nexthop group. This can result in a NPD [1][3] which can be reproduced using [2][4]. Fix by checking that the remote destination exists before dereferencing it. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 4 UID: 0 PID: 365 Comm: arping Not tainted 6.17.0-rc2-virtme-g2a89cb21162c #2 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014 RIP: 0010:vxlan_xmit+0xb58/0x15f0 [...] Call Trace: dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 packet_sendmsg+0x113a/0x1850 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [2] #!/bin/bash ip address add 192.0.2.1/32 dev lo ip nexthop add id 1 via 192.0.2.2 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 4789 proxy ip neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm dev vx0 bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10 arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3 [3] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 13 UID: 0 PID: 372 Comm: ndisc6 Not tainted 6.17.0-rc2-virtmne-g6ee90cb26014 #3 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1v996), BIOS 1.17.0-4.fc41 04/01/2x014 RIP: 0010:vxlan_xmit+0x803/0x1600 [...] Call Trace: dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 ip6_finish_output2+0x210/0x6c0 ip6_finish_output+0x1af/0x2b0 ip6_mr_output+0x92/0x3e0 ip6_send_skb+0x30/0x90 rawv6_sendmsg+0xe6e/0x12e0 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f383422ec77 [4] #!/bin/bash ip address add 2001:db8:1::1/128 dev lo ip nexthop add id 1 via 2001:db8:1::1 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 2001:db8:1::1 dstport 4789 proxy ip neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud perm dev vx0 bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10 ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0 Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 0f6a7c89a669..dab864bc733c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) n = neigh_lookup(&arp_tbl, &tip, dev); if (n) { + struct vxlan_rdst *rdst = NULL; struct vxlan_fdb *f; struct sk_buff *reply; @@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) rcu_read_lock(); f = vxlan_find_mac_tx(vxlan, n->ha, vni); - if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { + if (f) + rdst = first_remote_rcu(f); + if (rdst && vxlan_addr_any(&rdst->remote_ip)) { /* bridge-local neighbor */ neigh_release(n); rcu_read_unlock(); @@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev); if (n) { + struct vxlan_rdst *rdst = NULL; struct vxlan_fdb *f; struct sk_buff *reply; @@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) } f = vxlan_find_mac_tx(vxlan, n->ha, vni); - if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { + if (f) + rdst = first_remote_rcu(f); + if (rdst && vxlan_addr_any(&rdst->remote_ip)) { /* bridge-local neighbor */ neigh_release(n); goto out; -- cgit v1.2.3 From 2c9fb925c2ccc6ee475134840cff6c6b73851730 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:35 +0300 Subject: selftests: net: Add a selftest for VXLAN with FDB nexthop groups Add test cases for VXLAN with FDB nexthop groups, testing both IPv4 and IPv6. Test basic Tx functionality as well as some corner cases. Example output: # ./test_vxlan_nh.sh TEST: VXLAN FDB nexthop: IPv4 basic Tx [ OK ] TEST: VXLAN FDB nexthop: IPv6 basic Tx [ OK ] TEST: VXLAN FDB nexthop: learning [ OK ] TEST: VXLAN FDB nexthop: IPv4 proxy [ OK ] TEST: VXLAN FDB nexthop: IPv6 proxy [ OK ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/test_vxlan_nh.sh | 223 +++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100755 tools/testing/selftests/net/test_vxlan_nh.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..c7e03e1d6f63 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nh.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += test_neigh.sh diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done -- cgit v1.2.3 From 3a5f55500f3e93cf4d62351c753452279b088b4b Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 1 Sep 2025 20:37:25 +0800 Subject: ipv6: annotate data-races around devconf->rpl_seg_enabled devconf->rpl_seg_enabled can be changed concurrently from /proc/sys/net/ipv6/conf, annotate lockless reads on it. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20250901123726.1972881-2-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index d1ef9644f826..a23eb8734e15 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -494,10 +494,8 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_rpl_seg = net->ipv6.devconf_all->rpl_seg_enabled; - if (accept_rpl_seg > idev->cnf.rpl_seg_enabled) - accept_rpl_seg = idev->cnf.rpl_seg_enabled; - + accept_rpl_seg = min(READ_ONCE(net->ipv6.devconf_all->rpl_seg_enabled), + READ_ONCE(idev->cnf.rpl_seg_enabled)); if (!accept_rpl_seg) { kfree_skb(skb); return -1; -- cgit v1.2.3 From f63e7c8a83892781f6ceb55566f9497639c44555 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 1 Sep 2025 15:32:23 +0800 Subject: net: dsa: mv88e6xxx: Fix fwnode reference leaks in mv88e6xxx_port_setup_leds Fix multiple fwnode reference leaks: 1. The function calls fwnode_get_named_child_node() to get the "leds" node, but never calls fwnode_handle_put(leds) to release this reference. 2. Within the fwnode_for_each_child_node() loop, the early return paths that don't properly release the "led" fwnode reference. This fix follows the same pattern as commit d029edefed39 ("net dsa: qca8k: fix usages of device_get_named_child_node()") Fixes: 94a2a84f5e9e ("net: dsa: mv88e6xxx: Support LED control") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20250901073224.2273103-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/leds.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/leds.c b/drivers/net/dsa/mv88e6xxx/leds.c index 1c88bfaea46b..ab3bc645da56 100644 --- a/drivers/net/dsa/mv88e6xxx/leds.c +++ b/drivers/net/dsa/mv88e6xxx/leds.c @@ -779,7 +779,8 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port) continue; if (led_num > 1) { dev_err(dev, "invalid LED specified port %d\n", port); - return -EINVAL; + ret = -EINVAL; + goto err_put_led; } if (led_num == 0) @@ -823,17 +824,25 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port) init_data.devname_mandatory = true; init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d:0%d", chip->info->name, port, led_num); - if (!init_data.devicename) - return -ENOMEM; + if (!init_data.devicename) { + ret = -ENOMEM; + goto err_put_led; + } ret = devm_led_classdev_register_ext(dev, l, &init_data); kfree(init_data.devicename); if (ret) { dev_err(dev, "Failed to init LED %d for port %d", led_num, port); - return ret; + goto err_put_led; } } + fwnode_handle_put(leds); return 0; + +err_put_led: + fwnode_handle_put(led); + fwnode_handle_put(leds); + return ret; } -- cgit v1.2.3 From f8f15f6742b8874e59c9c715d0af3474608310ad Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:28 +0300 Subject: wifi: cw1200: cap SSID length in cw1200_do_join() If the ssidie[1] length is more that 32 it leads to memory corruption. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/e91fb43fcedc4893b604dfb973131661510901a7.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- drivers/net/wireless/st/cw1200/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index b1dd76e8aecb..5d8eaa700779 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -1291,7 +1291,7 @@ static void cw1200_do_join(struct cw1200_common *priv) rcu_read_lock(); ssidie = ieee80211_bss_get_ie(bss, WLAN_EID_SSID); if (ssidie) { - join.ssid_len = ssidie[1]; + join.ssid_len = min(ssidie[1], IEEE80211_MAX_SSID_LEN); memcpy(join.ssid, &ssidie[2], join.ssid_len); } rcu_read_unlock(); -- cgit v1.2.3 From c786794bd27b0d7a5fd9063695df83206009be59 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:35 +0300 Subject: wifi: libertas: cap SSID len in lbs_associate() If the ssid_eid[1] length is more that 32 it leads to memory corruption. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/2a40f5ec7617144aef412034c12919a4927d90ad.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/cfg.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 94dd488becaf..caba7491cd5a 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1151,10 +1151,13 @@ static int lbs_associate(struct lbs_private *priv, /* add SSID TLV */ rcu_read_lock(); ssid_eid = ieee80211_bss_get_ie(bss, WLAN_EID_SSID); - if (ssid_eid) - pos += lbs_add_ssid_tlv(pos, ssid_eid + 2, ssid_eid[1]); - else + if (ssid_eid) { + u32 ssid_len = min(ssid_eid[1], IEEE80211_MAX_SSID_LEN); + + pos += lbs_add_ssid_tlv(pos, ssid_eid + 2, ssid_len); + } else { lbs_deb_assoc("no SSID\n"); + } rcu_read_unlock(); /* add DS param TLV */ -- cgit v1.2.3 From 62b635dcd69c4fde7ce1de4992d71420a37e51e3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:45 +0300 Subject: wifi: cfg80211: sme: cap SSID length in __cfg80211_connect_result() If the ssid->datalen is more than IEEE80211_MAX_SSID_LEN (32) it would lead to memory corruption so add some bounds checking. Fixes: c38c70185101 ("wifi: cfg80211: Set SSID if it is not already set") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/0aaaae4a3ed37c6252363c34ae4904b1604e8e32.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- net/wireless/sme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 826ec0a6355f..3a028ff287fb 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -900,13 +900,16 @@ void __cfg80211_connect_result(struct net_device *dev, if (!wdev->u.client.ssid_len) { rcu_read_lock(); for_each_valid_link(cr, link) { + u32 ssid_len; + ssid = ieee80211_bss_get_elem(cr->links[link].bss, WLAN_EID_SSID); if (!ssid || !ssid->datalen) continue; - memcpy(wdev->u.client.ssid, ssid->data, ssid->datalen); + ssid_len = min(ssid->datalen, IEEE80211_MAX_SSID_LEN); + memcpy(wdev->u.client.ssid, ssid->data, ssid_len); wdev->u.client.ssid_len = ssid->datalen; break; } -- cgit v1.2.3 From fe9e4d0c39311d0f97b024147a0d155333f388b5 Mon Sep 17 00:00:00 2001 From: "Ajay.Kathat@microchip.com" Date: Fri, 29 Aug 2025 22:58:43 +0000 Subject: wifi: wilc1000: avoid buffer overflow in WID string configuration Fix the following copy overflow warning identified by Smatch checker. drivers/net/wireless/microchip/wilc1000/wlan_cfg.c:184 wilc_wlan_parse_response_frame() error: '__memcpy()' 'cfg->s[i]->str' copy overflow (512 vs 65537) This patch introduces size check before accessing the memory buffer. The checks are base on the WID type of received data from the firmware. For WID string configuration, the size limit is determined by individual element size in 'struct wilc_cfg_str_vals' that is maintained in 'len' field of 'struct wilc_cfg_str'. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aLFbr9Yu9j_TQTey@stanley.mountain Suggested-by: Dan Carpenter Signed-off-by: Ajay Singh Link: https://patch.msgid.link/20250829225829.5423-1-ajay.kathat@microchip.com Signed-off-by: Johannes Berg --- drivers/net/wireless/microchip/wilc1000/wlan_cfg.c | 37 ++++++++++++++++------ drivers/net/wireless/microchip/wilc1000/wlan_cfg.h | 5 +-- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c index 131388886acb..cfabd5aebb54 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c @@ -41,10 +41,10 @@ static const struct wilc_cfg_word g_cfg_word[] = { }; static const struct wilc_cfg_str g_cfg_str[] = { - {WID_FIRMWARE_VERSION, NULL}, - {WID_MAC_ADDR, NULL}, - {WID_ASSOC_RES_INFO, NULL}, - {WID_NIL, NULL} + {WID_FIRMWARE_VERSION, 0, NULL}, + {WID_MAC_ADDR, 0, NULL}, + {WID_ASSOC_RES_INFO, 0, NULL}, + {WID_NIL, 0, NULL} }; #define WILC_RESP_MSG_TYPE_CONFIG_REPLY 'R' @@ -147,44 +147,58 @@ static void wilc_wlan_parse_response_frame(struct wilc *wl, u8 *info, int size) switch (FIELD_GET(WILC_WID_TYPE, wid)) { case WID_CHAR: + len = 3; + if (len + 2 > size) + return; + while (cfg->b[i].id != WID_NIL && cfg->b[i].id != wid) i++; if (cfg->b[i].id == wid) cfg->b[i].val = info[4]; - len = 3; break; case WID_SHORT: + len = 4; + if (len + 2 > size) + return; + while (cfg->hw[i].id != WID_NIL && cfg->hw[i].id != wid) i++; if (cfg->hw[i].id == wid) cfg->hw[i].val = get_unaligned_le16(&info[4]); - len = 4; break; case WID_INT: + len = 6; + if (len + 2 > size) + return; + while (cfg->w[i].id != WID_NIL && cfg->w[i].id != wid) i++; if (cfg->w[i].id == wid) cfg->w[i].val = get_unaligned_le32(&info[4]); - len = 6; break; case WID_STR: + len = 2 + get_unaligned_le16(&info[2]); + while (cfg->s[i].id != WID_NIL && cfg->s[i].id != wid) i++; - if (cfg->s[i].id == wid) + if (cfg->s[i].id == wid) { + if (len > cfg->s[i].len || (len + 2 > size)) + return; + memcpy(cfg->s[i].str, &info[2], - get_unaligned_le16(&info[2]) + 2); + len); + } - len = 2 + get_unaligned_le16(&info[2]); break; default: @@ -384,12 +398,15 @@ int wilc_wlan_cfg_init(struct wilc *wl) /* store the string cfg parameters */ wl->cfg.s[i].id = WID_FIRMWARE_VERSION; wl->cfg.s[i].str = str_vals->firmware_version; + wl->cfg.s[i].len = sizeof(str_vals->firmware_version); i++; wl->cfg.s[i].id = WID_MAC_ADDR; wl->cfg.s[i].str = str_vals->mac_address; + wl->cfg.s[i].len = sizeof(str_vals->mac_address); i++; wl->cfg.s[i].id = WID_ASSOC_RES_INFO; wl->cfg.s[i].str = str_vals->assoc_rsp; + wl->cfg.s[i].len = sizeof(str_vals->assoc_rsp); i++; wl->cfg.s[i].id = WID_NIL; wl->cfg.s[i].str = NULL; diff --git a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h index 7038b74f8e8f..5ae74bced7d7 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h +++ b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h @@ -24,12 +24,13 @@ struct wilc_cfg_word { struct wilc_cfg_str { u16 id; + u16 len; u8 *str; }; struct wilc_cfg_str_vals { - u8 mac_address[7]; - u8 firmware_version[129]; + u8 mac_address[8]; + u8 firmware_version[130]; u8 assoc_rsp[WILC_MAX_ASSOC_RESP_FRAME_SIZE]; }; -- cgit v1.2.3 From b4ada0618eed0fbd1b1630f73deb048c592b06a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Tue, 2 Sep 2025 15:59:59 +0000 Subject: tools: ynl-gen: fix nested array counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The blamed commit introduced the concept of split attribute counting, and later allocating an array to hold them, however TypeArrayNest wasn't updated to use the new counting variable. Abbreviated example from tools/net/ynl/generated/nl80211-user.c: nl80211_if_combination_attributes_parse(...): unsigned int n_limits = 0; [...] ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len) if (type == NL80211_IFACE_COMB_LIMITS) ynl_attr_for_each_nested(attr2, attr) dst->_count.limits++; if (n_limits) { dst->_count.limits = n_limits; /* allocate and parse attributes */ } In the above example n_limits is guaranteed to always be 0, hence the conditional is unsatisfiable and is optimized out. This patch changes the attribute counting to use n_limits++ in the attribute counting loop in the above example. Fixes: 58da455b31ba ("tools: ynl-gen: improve unwind on parsing errors") Signed-off-by: Asbjørn Sloth Tønnesen Link: https://patch.msgid.link/20250902160001.760953-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index ef032e17fec4..eb295756c3bf 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -830,7 +830,7 @@ class TypeArrayNest(Type): 'ynl_attr_for_each_nested(attr2, attr) {', '\tif (ynl_attr_validate(yarg, attr2))', '\t\treturn YNL_PARSE_CB_ERROR;', - f'\t{var}->_count.{self.c_name}++;', + f'\tn_{self.c_name}++;', '}'] return get_lines, None, local_vars -- cgit v1.2.3 From 5d6b58c932ec451a5c41482790eb5b1ecf165a94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 18:36:03 +0000 Subject: net: lockless sock_i_ino() Followup of commit c51da3f7a161 ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 +++++++++++++---- net/core/sock.c | 22 ---------------------- net/mptcp/protocol.c | 1 - net/netlink/diag.c | 2 +- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..fb13322a11fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,6 +285,7 @@ struct sk_filter; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_ino: inode number (zero if orphaned) * @sk_prefer_busy_poll: prefer busypolling over softirq processing * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting @@ -518,6 +519,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk->sk_socket = sock; + if (sock) { + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } } static inline wait_queue_head_t *sk_sleep(struct sock *sk) @@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk) sk_set_socket(sk, NULL); sk->sk_wq = NULL; /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } @@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline unsigned long sock_i_ino(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ + return READ_ONCE(sk->sk_ino); +} + static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ return READ_ONCE(sk->sk_uid); } -unsigned long __sock_i_ino(struct sock *sk); -unsigned long sock_i_ino(struct sock *sk); - static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 7c26ec8dce63..158bddd23134 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb) EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ -unsigned long __sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - read_lock(&sk->sk_callback_lock); - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); - return ino; -} -EXPORT_SYMBOL(__sock_i_ino); - -unsigned long sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - local_bh_disable(); - ino = __sock_i_ino(sk); - local_bh_enable(); - return ino; -} -EXPORT_SYMBOL(sock_i_ino); - /* * Allocate a skb from the socket's send buffer. */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9a287b75c1b3..e6fd97b21e9e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 61981e01fd6f..b8e58132e8af 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -168,7 +168,7 @@ mc_list: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - __sock_i_ino(sk)) < 0) { + sock_i_ino(sk)) < 0) { ret = 1; break; } -- cgit v1.2.3 From 3bc32fd9db47a20f38b0783364fdb2f2f1c97220 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 1 Sep 2025 12:52:56 +0100 Subject: net: phylink: move PHY interrupt request to non-fail path The blamed commit added code which could return an error after we requested the PHY interrupt. When we return an error, the caller will call phy_detach() which fails to free the interrupt. Rearrange the code such that failing operations happen before the interrupt is requested, thereby allowing phy_detach() to be used. Note that replacing phy_detach() with phy_disconnect() in these paths could lead to freeing an interrupt which was never requested. Fixes: 1942b1c6f687 ("net: phylink: make configuring clock-stop dependent on MAC support") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1ut35k-00000001UEl-0iq6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index f1b57e3fdf30..c7cb95aa8007 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2141,9 +2141,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising); - if (phy_interrupt_is_valid(phy)) - phy_request_interrupt(phy); - if (pl->config->mac_managed_pm) phy->mac_managed_pm = true; @@ -2160,6 +2157,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, ret = 0; } + if (ret == 0 && phy_interrupt_is_valid(phy)) + phy_request_interrupt(phy); + return ret; } -- cgit v1.2.3 From 9d28f94912589f04ab51fbccaef287d4f40e0d1f Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 1 Sep 2025 14:30:18 -0700 Subject: net: thunder_bgx: add a missing of_node_put phy_np needs to get freed, just like the other child nodes. Fixes: 5fc7cf179449 ("net: thunderx: Cleanup PHY probing code.") Signed-off-by: Rosen Penev Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901213018.47392-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 21495b5dce25..0f913db4814e 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1493,13 +1493,17 @@ static int bgx_init_of_phy(struct bgx *bgx) * this cortina phy, for which there is no driver * support, ignore it. */ - if (phy_np && - !of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { - /* Wait until the phy drivers are available */ - pd = of_phy_find_device(phy_np); - if (!pd) - goto defer; - bgx->lmac[lmac].phydev = pd; + if (phy_np) { + if (!of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { + /* Wait until the phy drivers are available */ + pd = of_phy_find_device(phy_np); + if (!pd) { + of_node_put(phy_np); + goto defer; + } + bgx->lmac[lmac].phydev = pd; + } + of_node_put(phy_np); } lmac++; -- cgit v1.2.3 From 9e3d71a92e561ccc77025689dab25d201fee7a3e Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 1 Sep 2025 14:33:14 -0700 Subject: net: thunder_bgx: decrement cleanup index before use All paths in probe that call goto defer do so before assigning phydev and thus it makes sense to cleanup the prior index. It also fixes a bug where index 0 does not get cleaned up. Fixes: b7d3e3d3d21a ("net: thunderx: Don't leak phy device references on -EPROBE_DEFER condition.") Signed-off-by: Rosen Penev Reviewed-by: Vadim Fedorenko Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901213314.48599-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 0f913db4814e..9efb60842ad1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1519,11 +1519,11 @@ defer: * for phy devices we may have already found. */ while (lmac) { + lmac--; if (bgx->lmac[lmac].phydev) { put_device(&bgx->lmac[lmac].phydev->mdio.dev); bgx->lmac[lmac].phydev = NULL; } - lmac--; } of_node_put(node); return -EPROBE_DEFER; -- cgit v1.2.3 From a51160f8da850a65afbf165f5bbac7ffb388bf74 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Sep 2025 09:36:08 +0300 Subject: ipv4: Fix NULL vs error pointer check in inet_blackhole_dev_init() The inetdev_init() function never returns NULL. Check for error pointers instead. Fixes: 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/aLaQWL9NguWmeM1i@stanley.mountain Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c47d3828d4f6..942a887bf089 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -340,14 +340,13 @@ static void inetdev_destroy(struct in_device *in_dev) static int __init inet_blackhole_dev_init(void) { - int err = 0; + struct in_device *in_dev; rtnl_lock(); - if (!inetdev_init(blackhole_netdev)) - err = -ENOMEM; + in_dev = inetdev_init(blackhole_netdev); rtnl_unlock(); - return err; + return PTR_ERR_OR_ZERO(in_dev); } late_initcall(inet_blackhole_dev_init); -- cgit v1.2.3 From cc282f73bc0cbdf3ee7af2f2d3a2ef4e6b19242d Mon Sep 17 00:00:00 2001 From: Mahanta Jambigi Date: Tue, 2 Sep 2025 10:20:41 +0200 Subject: net/smc: Remove validation of reserved bits in CLC Decline message Currently SMC code is validating the reserved bits while parsing the incoming CLC decline message & when this validation fails, its treated as a protocol error. As a result, the SMC connection is terminated instead of falling back to TCP. As per RFC7609[1] specs we shouldn't be validating the reserved bits that is part of CLC message. This patch fixes this issue. CLC Decline message format can viewed here[2]. [1] https://datatracker.ietf.org/doc/html/rfc7609#page-92 [2] https://datatracker.ietf.org/doc/html/rfc7609#page-105 Fixes: 8ade200c269f ("net/smc: add v2 format of CLC decline message") Signed-off-by: Mahanta Jambigi Reviewed-by: Sidraya Jayagond Reviewed-by: Alexandra Winter Reviewed-by: Dust Li Link: https://patch.msgid.link/20250902082041.98996-1-mjambigi@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 5a4db151fe95..08be56dfb3f2 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -426,8 +426,6 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) { struct smc_clc_msg_hdr *hdr = &dclc->hdr; - if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) - return false; if (hdr->version == SMC_V1) { if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) return false; -- cgit v1.2.3 From a125c8fb9ddbcb0602103a50727a476fd30dec01 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 2 Sep 2025 03:20:55 -0700 Subject: mctp: return -ENOPROTOOPT for unknown getsockopt options In mctp_getsockopt(), unrecognized options currently return -EINVAL. In contrast, mctp_setsockopt() returns -ENOPROTOOPT for unknown options. Update mctp_getsockopt() to also return -ENOPROTOOPT for unknown options. This aligns the behavior of getsockopt() and setsockopt(), and matches the standard kernel socket API convention for handling unsupported options. Fixes: 99ce45d5e7db ("mctp: Implement extended addressing") Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20250902102059.1370008-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index df4e8cf33899..685524800d70 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -425,7 +425,7 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname, return 0; } - return -EINVAL; + return -ENOPROTOOPT; } /* helpers for reading/writing the tag ioc, handling compatibility across the -- cgit v1.2.3 From 8156210d36a43e76372312c87eb5ea3dbb405a85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 12:46:42 +0000 Subject: ax25: properly unshare skbs in ax25_kiss_rcv() Bernard Pidoux reported a regression apparently caused by commit c353e8983e0d ("net: introduce per netns packet chains"). skb->dev becomes NULL and we crash in __netif_receive_skb_core(). Before above commit, different kind of bugs or corruptions could happen without a major crash. But the root cause is that ax25_kiss_rcv() can queue/mangle input skb without checking if this skb is shared or not. Many thanks to Bernard Pidoux for his help, diagnosis and tests. We had a similar issue years ago fixed with commit 7aaed57c5c28 ("phonet: properly unshare skbs in phonet_rcv()"). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Bernard Pidoux Closes: https://lore.kernel.org/netdev/1713f383-c538-4918-bc64-13b3288cd542@free.fr/ Tested-by: Bernard Pidoux Signed-off-by: Eric Dumazet Cc: Joerg Reuter Cc: David Ranch Cc: Folkert van Heusden Reviewed-by: Dan Cross Link: https://patch.msgid.link/20250902124642.212705-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ax25/ax25_in.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 1cac25aca637..f2d66af86359 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -433,6 +433,10 @@ free: int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + skb_orphan(skb); if (!net_eq(dev_net(dev), &init_net)) { -- cgit v1.2.3 From 661a4f307fe0f80c1d544e09476ccba9037e8e65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Aug 2025 19:17:32 +0200 Subject: selftests: netfilter: fix udpclash tool hang Yi Chen reports that 'udpclash' loops forever depending on compiler (and optimization level used); while (x == 1) gets optimized into for (;;). Add volatile qualifier to avoid that. While at it, also run it under timeout(1) and fix the resize script to not ignore the timeout passed as second parameter to insert_flood. Reported-by: Yi Chen Suggested-by: Yi Chen Fixes: 78a588363587 ("selftests: netfilter: add conntrack clash resolution test case") Signed-off-by: Florian Westphal --- tools/testing/selftests/net/netfilter/conntrack_clash.sh | 2 +- tools/testing/selftests/net/netfilter/conntrack_resize.sh | 5 +++-- tools/testing/selftests/net/netfilter/udpclash.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 606a43a60f73..7fc6c5dbd551 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -99,7 +99,7 @@ run_one_clash_test() local entries local cre - if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then echo "INFO: did not receive expected number of replies for $daddr:$dport" ip netns exec "$ctns" conntrack -S # don't fail: check if clash resolution triggered after all. diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 788cd56ea4a0..615fe3c6f405 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -187,7 +187,7 @@ ct_udpclash() [ -x udpclash ] || return while [ $now -lt $end ]; do - ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 now=$(date +%s) done @@ -277,6 +277,7 @@ check_taint() insert_flood() { local n="$1" + local timeout="$2" local r=0 r=$((RANDOM%$insert_count)) @@ -302,7 +303,7 @@ test_floodresize_all() read tainted_then < /proc/sys/kernel/tainted for n in "$nsclient1" "$nsclient2";do - insert_flood "$n" & + insert_flood "$n" "$timeout" & done # resize table constantly while flood/insert/dump/flushs diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c index 85c7b906ad08..79de163d61ab 100644 --- a/tools/testing/selftests/net/netfilter/udpclash.c +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -29,7 +29,7 @@ struct thread_args { int sockfd; }; -static int wait = 1; +static volatile int wait = 1; static void *thread_main(void *varg) { -- cgit v1.2.3 From 4039ce7ef40474d5ba46f414c50cc7020b9cf8ae Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 7 Aug 2025 15:49:59 +0200 Subject: netfilter: nf_tables: Introduce NFTA_DEVICE_PREFIX This new attribute is supposed to be used instead of NFTA_DEVICE_NAME for simple wildcard interface specs. It holds a NUL-terminated string representing an interface name prefix to match on. While kernel code to distinguish full names from prefixes in NFTA_DEVICE_NAME is simpler than this solution, reusing the existing attribute with different semantics leads to confusion between different versions of kernel and user space though: * With old kernels, wildcards submitted by user space are accepted yet silently treated as regular names. * With old user space, wildcards submitted by kernel may cause crashes since libnftnl expects NUL-termination when there is none. Using a distinct attribute type sanitizes these situations as the receiving part detects and rejects the unexpected attribute nested in *_HOOK_DEVS attributes. Fixes: 6d07a289504a ("netfilter: nf_tables: Support wildcard netdev hook specs") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 42 +++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2beb30be2c5f..8e0eb832bc01 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1784,10 +1784,12 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, + NFTA_DEVICE_PREFIX, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 58c5425d61c2..c1082de09656 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1959,6 +1959,18 @@ nla_put_failure: return -ENOSPC; } +static bool hook_is_prefix(struct nft_hook *hook) +{ + return strlen(hook->ifname) >= hook->ifnamelen; +} + +static int nft_nla_put_hook_dev(struct sk_buff *skb, struct nft_hook *hook) +{ + int attr = hook_is_prefix(hook) ? NFTA_DEVICE_PREFIX : NFTA_DEVICE_NAME; + + return nla_put_string(skb, attr, hook->ifname); +} + static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, @@ -1990,16 +2002,15 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!first) first = hook; - if (nla_put(skb, NFTA_DEVICE_NAME, - hook->ifnamelen, hook->ifname)) + if (nft_nla_put_hook_dev(skb, hook)) goto nla_put_failure; n++; } nla_nest_end(skb, nest_devs); if (n == 1 && - nla_put(skb, NFTA_HOOK_DEV, - first->ifnamelen, first->ifname)) + !hook_is_prefix(first) && + nla_put_string(skb, NFTA_HOOK_DEV, first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -2310,7 +2321,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain) } static struct nft_hook *nft_netdev_hook_alloc(struct net *net, - const struct nlattr *attr) + const struct nlattr *attr, + bool prefix) { struct nf_hook_ops *ops; struct net_device *dev; @@ -2327,7 +2339,8 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, if (err < 0) goto err_hook_free; - hook->ifnamelen = nla_len(attr); + /* include the terminating NUL-char when comparing non-prefixes */ + hook->ifnamelen = strlen(hook->ifname) + !prefix; /* nf_tables_netdev_event() is called under rtnl_mutex, this is * indirectly serializing all the other holders of the commit_mutex with @@ -2374,14 +2387,22 @@ static int nf_tables_parse_netdev_hooks(struct net *net, struct nft_hook *hook, *next; const struct nlattr *tmp; int rem, n = 0, err; + bool prefix; nla_for_each_nested(tmp, attr, rem) { - if (nla_type(tmp) != NFTA_DEVICE_NAME) { + switch (nla_type(tmp)) { + case NFTA_DEVICE_NAME: + prefix = false; + break; + case NFTA_DEVICE_PREFIX: + prefix = true; + break; + default: err = -EINVAL; goto err_hook; } - hook = nft_netdev_hook_alloc(net, tmp); + hook = nft_netdev_hook_alloc(net, tmp, prefix); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tmp); err = PTR_ERR(hook); @@ -2427,7 +2448,7 @@ static int nft_chain_parse_netdev(struct net *net, struct nlattr *tb[], int err; if (tb[NFTA_HOOK_DEV]) { - hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]); + hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV], false); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tb[NFTA_HOOK_DEV]); return PTR_ERR(hook); @@ -9458,8 +9479,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, list_for_each_entry_rcu(hook, hook_list, list, lockdep_commit_lock_is_held(net)) { - if (nla_put(skb, NFTA_DEVICE_NAME, - hook->ifnamelen, hook->ifname)) + if (nft_nla_put_hook_dev(skb, hook)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); -- cgit v1.2.3 From 0a228624bcc00af41f281a2a84c928595a74c17d Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Mon, 1 Sep 2025 14:35:37 +0800 Subject: net: atm: fix memory leak in atm_register_sysfs when device_register fail When device_register() return error in atm_register_sysfs(), which can be triggered by kzalloc fail in device_private_init() or other reasons, kmemleak reports the following memory leaks: unreferenced object 0xffff88810182fb80 (size 8): comm "insmod", pid 504, jiffies 4294852464 hex dump (first 8 bytes): 61 64 75 6d 6d 79 30 00 adummy0. backtrace (crc 14dfadaf): __kmalloc_node_track_caller_noprof+0x335/0x450 kvasprintf+0xb3/0x130 kobject_set_name_vargs+0x45/0x120 dev_set_name+0xa9/0xe0 atm_register_sysfs+0xf3/0x220 atm_dev_register+0x40b/0x780 0xffffffffa000b089 do_one_initcall+0x89/0x300 do_init_module+0x27b/0x7d0 load_module+0x54cd/0x5ff0 init_module_from_file+0xe4/0x150 idempotent_init_module+0x32c/0x610 __x64_sys_finit_module+0xbd/0x120 do_syscall_64+0xa8/0x270 entry_SYSCALL_64_after_hwframe+0x77/0x7f When device_create_file() return error in atm_register_sysfs(), the same issue also can be triggered. Function put_device() should be called to release kobj->name memory and other device resource, instead of kfree(). Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Wang Liang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901063537.1472221-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- net/atm/resources.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/atm/resources.c b/net/atm/resources.c index b19d851e1f44..7c6fdedbcf4e 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -112,7 +112,9 @@ struct atm_dev *atm_dev_register(const char *type, struct device *parent, if (atm_proc_dev_register(dev) < 0) { pr_err("atm_proc_dev_register failed for dev %s\n", type); - goto out_fail; + mutex_unlock(&atm_dev_mutex); + kfree(dev); + return NULL; } if (atm_register_sysfs(dev, parent) < 0) { @@ -128,7 +130,7 @@ out: return dev; out_fail: - kfree(dev); + put_device(&dev->class_dev); dev = NULL; goto out; } -- cgit v1.2.3 From 8bbceba7dc5090c00105e006ce28d1292cfda8dd Mon Sep 17 00:00:00 2001 From: Abin Joseph Date: Wed, 3 Sep 2025 08:22:13 +0530 Subject: net: xilinx: axienet: Add error handling for RX metadata pointer retrieval Add proper error checking for dmaengine_desc_get_metadata_ptr() which can return an error pointer and lead to potential crashes or undefined behaviour if the pointer retrieval fails. Properly handle the error by unmapping DMA buffer, freeing the skb and returning early to prevent further processing with invalid data. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Abin Joseph Reviewed-by: Radhey Shyam Pandey Link: https://patch.msgid.link/20250903025213.3120181-1-abin.joseph@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0d8a05fe541a..ec6d47dc984a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1168,6 +1168,15 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) &meta_max_len); dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, DMA_FROM_DEVICE); + + if (IS_ERR(app_metadata)) { + if (net_ratelimit()) + netdev_err(lp->ndev, "Failed to get RX metadata pointer\n"); + dev_kfree_skb_any(skb); + lp->ndev->stats.rx_dropped++; + goto rx_submit; + } + /* TODO: Derive app word index programmatically */ rx_len = (app_metadata[LEN_APP] & 0xFFFF); skb_put(skb, rx_len); @@ -1180,6 +1189,7 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) u64_stats_add(&lp->rx_bytes, rx_len); u64_stats_update_end(&lp->rx_stat_sync); +rx_submit: for (i = 0; i < CIRC_SPACE(lp->rx_ring_head, lp->rx_ring_tail, RX_BUF_NUM_DEFAULT); i++) axienet_rx_submit_desc(lp->ndev); -- cgit v1.2.3 From 4844123fe0b853a4982c02666cb3fd863d701d50 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Wed, 3 Sep 2025 18:07:26 +0800 Subject: ppp: fix memory leak in pad_compress_skb If alloc_skb() fails in pad_compress_skb(), it returns NULL without releasing the old skb. The caller does: skb = pad_compress_skb(ppp, skb); if (!skb) goto drop; drop: kfree_skb(skb); When pad_compress_skb() returns NULL, the reference to the old skb is lost and kfree_skb(skb) ends up doing nothing, leading to a memory leak. Align pad_compress_skb() semantics with realloc(): only free the old skb if allocation and compression succeed. At the call site, use the new_skb variable so the original skb is not lost when pad_compress_skb() fails. Fixes: b3f9b92a6ec1 ("[PPP]: add PPP MPPE encryption module") Signed-off-by: Qingfang Deng Reviewed-by: Eric Dumazet Reviewed-by: Yue Haibing Link: https://patch.msgid.link/20250903100726.269839-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 824c8dc4120b..702a7f7183ce 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1744,7 +1744,6 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) */ if (net_ratelimit()) netdev_err(ppp->dev, "ppp: compressor dropped pkt\n"); - kfree_skb(skb); consume_skb(new_skb); new_skb = NULL; } @@ -1845,9 +1844,10 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) "down - pkt dropped.\n"); goto drop; } - skb = pad_compress_skb(ppp, skb); - if (!skb) + new_skb = pad_compress_skb(ppp, skb); + if (!new_skb) goto drop; + skb = new_skb; } /* -- cgit v1.2.3 From b1ab3b029ff137f124c547452d0795e9de20ca63 Mon Sep 17 00:00:00 2001 From: Jeroen de Borst Date: Wed, 3 Sep 2025 10:56:49 -0700 Subject: gve: update MAINTAINERS Jeroen is leaving Google and Josh is taking his place as a maintainer. Signed-off-by: Jeroen de Borst Link: https://patch.msgid.link/20250903175649.23246-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1819c477eee3..c6f470250f6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10388,7 +10388,7 @@ S: Maintained F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS -M: Jeroen de Borst +M: Joshua Washington M: Harshitha Ramamurthy L: netdev@vger.kernel.org S: Maintained -- cgit v1.2.3 From 6a989d37302a41a8a8d6231a4c265fdb6b09d6eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 3 Sep 2025 14:20:54 -0700 Subject: MAINTAINERS: add Sabrina to TLS maintainers Sabrina has been very helpful reviewing TLS patches, fixing bugs, and, I believe, the last one to implement any major feature in the TLS code base (rekeying). Add her as a maintainer. Acked-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250903212054.1885058-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c6f470250f6a..2dd6e8e5c0fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17850,6 +17850,7 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: John Fastabend M: Jakub Kicinski +M: Sabrina Dubroca L: netdev@vger.kernel.org S: Maintained F: include/net/tls.h -- cgit v1.2.3 From fd2004d82d8d8faa94879e3de3096c8511728637 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 3 Sep 2025 22:28:51 +0000 Subject: selftest: net: Fix weird setsockopt() in bind_bhash.c. bind_bhash.c passes (SO_REUSEADDR | SO_REUSEPORT) to setsockopt(). In the asm-generic definition, the value happens to match with the bare SO_REUSEPORT, (2 | 15) == 15, but not on some arch. arch/alpha/include/uapi/asm/socket.h:18:#define SO_REUSEADDR 0x0004 arch/alpha/include/uapi/asm/socket.h:24:#define SO_REUSEPORT 0x0200 arch/mips/include/uapi/asm/socket.h:24:#define SO_REUSEADDR 0x0004 /* Allow reuse of local addresses. */ arch/mips/include/uapi/asm/socket.h:33:#define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ arch/parisc/include/uapi/asm/socket.h:12:#define SO_REUSEADDR 0x0004 arch/parisc/include/uapi/asm/socket.h:18:#define SO_REUSEPORT 0x0200 arch/sparc/include/uapi/asm/socket.h:13:#define SO_REUSEADDR 0x0004 arch/sparc/include/uapi/asm/socket.h:20:#define SO_REUSEPORT 0x0200 include/uapi/asm-generic/socket.h:12:#define SO_REUSEADDR 2 include/uapi/asm-generic/socket.h:27:#define SO_REUSEPORT 15 Let's pass SO_REUSEPORT only. Fixes: c35ecb95c448 ("selftests/net: Add test for timing a bind request to a port with a populated bhash entry") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250903222938.2601522-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bind_bhash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; -- cgit v1.2.3 From 9b2bfdbf43adb9929c5ddcdd96efedbf1c88cf53 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 2 Sep 2025 14:12:59 +0200 Subject: phy: mscc: Stop taking ts_lock for tx_queue and use its own lock When transmitting a PTP frame which is timestamp using 2 step, the following warning appears if CONFIG_PROVE_LOCKING is enabled: ============================= [ BUG: Invalid wait context ] 6.17.0-rc1-00326-ge6160462704e #427 Not tainted ----------------------------- ptp4l/119 is trying to lock: c2a44ed4 (&vsc8531->ts_lock){+.+.}-{3:3}, at: vsc85xx_txtstamp+0x50/0xac other info that might help us debug this: context-{4:4} 4 locks held by ptp4l/119: #0: c145f068 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x58/0x1440 #1: c29df974 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x5c4/0x1440 #2: c2aaaad0 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x108/0x350 #3: c2aac170 (&lan966x->tx_lock){+.-.}-{2:2}, at: lan966x_port_xmit+0xd0/0x350 stack backtrace: CPU: 0 UID: 0 PID: 119 Comm: ptp4l Not tainted 6.17.0-rc1-00326-ge6160462704e #427 NONE Hardware name: Generic DT based system Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x7c/0xac dump_stack_lvl from __lock_acquire+0x8e8/0x29dc __lock_acquire from lock_acquire+0x108/0x38c lock_acquire from __mutex_lock+0xb0/0xe78 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from vsc85xx_txtstamp+0x50/0xac vsc85xx_txtstamp from lan966x_fdma_xmit+0xd8/0x3a8 lan966x_fdma_xmit from lan966x_port_xmit+0x1bc/0x350 lan966x_port_xmit from dev_hard_start_xmit+0xc8/0x2c0 dev_hard_start_xmit from sch_direct_xmit+0x8c/0x350 sch_direct_xmit from __dev_queue_xmit+0x680/0x1440 __dev_queue_xmit from packet_sendmsg+0xfa4/0x1568 packet_sendmsg from __sys_sendto+0x110/0x19c __sys_sendto from sys_send+0x18/0x20 sys_send from ret_fast_syscall+0x0/0x1c Exception stack(0xf0b05fa8 to 0xf0b05ff0) 5fa0: 00000001 0000000e 0000000e 0004b47a 0000003a 00000000 5fc0: 00000001 0000000e 00000000 00000121 0004af58 00044874 00000000 00000000 5fe0: 00000001 bee9d420 00025a10 b6e75c7c So, instead of using the ts_lock for tx_queue, use the spinlock that skb_buff_head has. Reviewed-by: Vadim Fedorenko Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250902121259.3257536-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_ptp.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 72847320cb65..d692df7d975c 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -456,12 +456,12 @@ static void vsc85xx_dequeue_skb(struct vsc85xx_ptp *ptp) *p++ = (reg >> 24) & 0xff; } - len = skb_queue_len(&ptp->tx_queue); + len = skb_queue_len_lockless(&ptp->tx_queue); if (len < 1) return; while (len--) { - skb = __skb_dequeue(&ptp->tx_queue); + skb = skb_dequeue(&ptp->tx_queue); if (!skb) return; @@ -486,7 +486,7 @@ static void vsc85xx_dequeue_skb(struct vsc85xx_ptp *ptp) * packet in the FIFO right now, reschedule it for later * packets. */ - __skb_queue_tail(&ptp->tx_queue, skb); + skb_queue_tail(&ptp->tx_queue, skb); } } @@ -1068,6 +1068,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, case HWTSTAMP_TX_ON: break; case HWTSTAMP_TX_OFF: + skb_queue_purge(&vsc8531->ptp->tx_queue); break; default: return -ERANGE; @@ -1092,9 +1093,6 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, mutex_lock(&vsc8531->ts_lock); - __skb_queue_purge(&vsc8531->ptp->tx_queue); - __skb_queue_head_init(&vsc8531->ptp->tx_queue); - /* Disable predictor while configuring the 1588 block */ val = vsc85xx_ts_read_csr(phydev, PROCESSOR, MSCC_PHY_PTP_INGR_PREDICTOR); @@ -1180,9 +1178,7 @@ static void vsc85xx_txtstamp(struct mii_timestamper *mii_ts, skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - mutex_lock(&vsc8531->ts_lock); - __skb_queue_tail(&vsc8531->ptp->tx_queue, skb); - mutex_unlock(&vsc8531->ts_lock); + skb_queue_tail(&vsc8531->ptp->tx_queue, skb); return; out: @@ -1548,6 +1544,7 @@ void vsc8584_ptp_deinit(struct phy_device *phydev) if (vsc8531->ptp->ptp_clock) { ptp_clock_unregister(vsc8531->ptp->ptp_clock); skb_queue_purge(&vsc8531->rx_skbs_list); + skb_queue_purge(&vsc8531->ptp->tx_queue); } } @@ -1571,7 +1568,7 @@ irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev) if (rc & VSC85XX_1588_INT_FIFO_ADD) { vsc85xx_get_tx_ts(priv->ptp); } else if (rc & VSC85XX_1588_INT_FIFO_OVERFLOW) { - __skb_queue_purge(&priv->ptp->tx_queue); + skb_queue_purge(&priv->ptp->tx_queue); vsc85xx_ts_reset_fifo(phydev); } @@ -1591,6 +1588,7 @@ int vsc8584_ptp_probe(struct phy_device *phydev) mutex_init(&vsc8531->phc_lock); mutex_init(&vsc8531->ts_lock); skb_queue_head_init(&vsc8531->rx_skbs_list); + skb_queue_head_init(&vsc8531->ptp->tx_queue); /* Retrieve the shared load/save GPIO. Request it as non exclusive as * the same GPIO can be requested by all the PHYs of the same package. -- cgit v1.2.3