From 0eaa1f245ac03ed0c6394159360532726f666811 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 29 May 2026 08:57:05 +0300 Subject: wifi: iwlwifi: mvm: don't support the reset handshake for old firmwares -77.ucode doesn't contain the fixes for this flow it seems. Don't use the firmware reset handshake even if the firmware claims support for it. Fixes: 906d4eb84408 ("iwlwifi: support firmware reset handshake") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220600 Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260529085453.9307b81d9b02.I21bba9e649f4cd0e35d3ea6cd97a03258be5832f@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ae177477b201..384bed95835d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1416,6 +1416,12 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_rf_cfg *cfg, fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE); + /* Those firmware versions claim to support the fw_reset_handshake + * but they are buggy. + */ + if (IWL_UCODE_MAJOR(mvm->fw->ucode_ver) <= 77) + trans->conf.fw_reset_handshake = false; + trans->conf.queue_alloc_cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(DATA_PATH_GROUP, -- cgit v1.2.3 From 9bf1b409afc7c4a1f0340f3975846c4f3278643a Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Fri, 29 May 2026 08:57:06 +0300 Subject: wifi: iwlwifi: mld: send tx power constraints before link activation TX power constraints must be sent to the firmware before link activation. If not, the firmware will use default power values. Fix this by moving the iwl_mld_send_ap_tx_power_constraint_cmd() call from iwl_mld_start_ap_ibss() to iwl_mld_assign_vif_chanctx(), before iwl_mld_activate_link() for AP interfaces. Also update the guard in the function to allow it to run before link activation for AP interfaces. Signed-off-by: Pagadala Yesu Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260529085453.06c94b01efd2.Id43bdfe5eb030061c23348779687ba71b5f58182@changeid --- drivers/net/wireless/intel/iwlwifi/mld/ap.c | 4 ---- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 7 +++++++ drivers/net/wireless/intel/iwlwifi/mld/power.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ap.c b/drivers/net/wireless/intel/iwlwifi/mld/ap.c index 5c59acc8c4c5..6598d9333333 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ap.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ap.c @@ -9,7 +9,6 @@ #include "ap.h" #include "hcmd.h" #include "tx.h" -#include "power.h" #include "key.h" #include "phy.h" #include "iwl-utils.h" @@ -273,9 +272,6 @@ int iwl_mld_start_ap_ibss(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx; int ret; - if (vif->type == NL80211_IFTYPE_AP) - iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link); - ret = iwl_mld_update_beacon_template(mld, vif, link); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index da6fd7471568..3c8daddc0bcb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1150,6 +1150,13 @@ int iwl_mld_assign_vif_chanctx(struct ieee80211_hw *hw, if (iwl_mld_can_activate_link(mld, vif, link)) { iwl_mld_tlc_update_phy(mld, vif, link); + /* FW requires AP_TX_POWER_CONSTRAINTS_CMD before link + * activation for AP and after link activation for STA, + * for an unknown reason. + */ + if (vif->type == NL80211_IFTYPE_AP) + iwl_mld_send_ap_tx_power_constraint_cmd(mld, vif, link); + ret = iwl_mld_activate_link(mld, link); if (ret) goto err; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/power.c b/drivers/net/wireless/intel/iwlwifi/mld/power.c index 49b0d9f8f865..266fe16bb95d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/power.c @@ -366,7 +366,7 @@ iwl_mld_send_ap_tx_power_constraint_cmd(struct iwl_mld *mld, lockdep_assert_wiphy(mld->wiphy); - if (!mld_link->active) + if (!mld_link->active && vif->type != NL80211_IFTYPE_AP) return; if (link->chanreq.oper.chan->band != NL80211_BAND_6GHZ) -- cgit v1.2.3 From e0c121d545134af886b28c4c26d91abf5dd39c17 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 29 May 2026 08:57:07 +0300 Subject: wifi: iwlwifi: mvm: avoid oversized UATS command copy MCC_ALLOWED_AP_TYPE_CMD exceeds the fixed copied host-command buffer and triggers warnings in the gen2 enqueue path when command 0xc05 is sent. Use IWL_HCMD_DFL_NOCOPY as it was done before the offending commit. Fixes: 078df640ef05 ("wifi: iwlwifi: mld: add support for iwl_mcc_allowed_ap_type_cmd v2") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260529085453.9af349ab459b.I348df3980764c15efce0099a35fe8a88fb2a6ee2@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index f05df3a3300e..6e507d6dcdd2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -459,9 +459,14 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm, static void iwl_mvm_uats_init(struct iwl_mvm *mvm) { + struct iwl_mcc_allowed_ap_type_cmd_v1 *cmd __free(kfree) = NULL; int cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP, MCC_ALLOWED_AP_TYPE_CMD); - struct iwl_mcc_allowed_ap_type_cmd_v1 cmd = {}; + struct iwl_host_cmd hcmd = { + .id = cmd_id, + .len[0] = sizeof(*cmd), + .dataflags[0] = IWL_HCMD_DFL_NOCOPY, + }; u8 cmd_ver; int ret; @@ -485,14 +490,25 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) if (!mvm->fwrt.ap_type_cmd_valid) return; + /* Since we free the command immediately after iwl_mvm_send_cmd, we + * must send this command in SYNC mode. + */ + lockdep_assert_held(&mvm->mutex); + + cmd = kzalloc_obj(*cmd); + if (!cmd) + return; + BUILD_BUG_ON(sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map) != - sizeof(cmd.mcc_to_ap_type_map)); + sizeof(cmd->mcc_to_ap_type_map)); - memcpy(cmd.mcc_to_ap_type_map, + memcpy(cmd->mcc_to_ap_type_map, mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map, sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map)); - ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd); + hcmd.data[0] = cmd; + + ret = iwl_mvm_send_cmd(mvm, &hcmd); if (ret < 0) IWL_ERR(mvm, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n", ret); -- cgit v1.2.3 From 093305d801fae6ff9b8bb531fd78b579794c4f80 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 31 May 2026 13:30:19 +0300 Subject: wifi: iwlwifi: pcie: simplify the resume flow if fast resume is not used In most distributions, NetworkManager shuts the device down before entering system suspend, so fast suspend is typically not used. On older devices, resume currently tries to grab NIC access to infer whether the device was powered off while suspended. That probe is only meaningful for the fast-suspend path where the device is expected to remain alive. Unfortunately, for unclear reasons, grabbing NIC access was harmful as reported in the bugzilla ticket below. Workaround this issue by simply not grabbing NIC access if fast suspend is not used. Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221501 Assisted-by: GitHub Copilot:gpt-5.3-codex Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20260531133005.e2ed9e0cd44f.If283625983a843933e0c01561a421daff184e9e9@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 56 +++++++++++++++------------ 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index dc99e7ac4726..eb3c5a6dd088 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1225,33 +1225,41 @@ static int _iwl_pci_resume(struct device *device, bool restore) if (!trans->op_mode) return 0; - /* - * Scratch value was altered, this means the device was powered off, we - * need to reset it completely. - * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * but not bits [15:8]. So if we have bits set in lower word, assume - * the device is alive. - * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer - * have access to the device and consider it powered off. - * For older devices, just try silently to grab the NIC. - */ - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); - - if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || - scratch == ~0U) - device_was_powered_off = true; - } else { + if (test_bit(STATUS_DEVICE_ENABLED, &trans->status)) { /* - * bh are re-enabled by iwl_trans_pcie_release_nic_access, - * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + * Scratch value was altered, this means the device was powered + * off, we need to reset it completely. + * Note: MAC (bits 0:7) will be cleared upon suspend even with + * wowlan, but not bits [15:8]. So if we have bits set in lower + * word, assume the device is alive. + * Alternatively, if the scratch value is 0xFFFFFFFF, then we + * no longer have access to the device and consider it powered + * off. + * For older devices, just try silently to grab the NIC. */ - local_bh_disable(); - if (_iwl_trans_pcie_grab_nic_access(trans, true)) { - iwl_trans_pcie_release_nic_access(trans); + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); + + if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || + scratch == ~0U) { + IWL_DEBUG_WOWLAN(trans, + "Scratch 0x%08x indicates device was powered off\n", + scratch); + device_was_powered_off = true; + } } else { - device_was_powered_off = true; - local_bh_enable(); + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access + * fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } } } -- cgit v1.2.3 From 02896a7fa4cd3ec61d60ba30136841e4f04bdeac Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Tue, 26 May 2026 19:29:32 +0300 Subject: net/mlx5: Reorder completion before putting command entry in cmd_work_handler Assuming callback != NULL && !page_queue, cmd_work_handler takes command entry with refcnt == 1 from mlx5_cmd_invoke. If either semaphore timeout or index allocation error happens, it does final cmd_ent_put(ent). To avoid access to freed memory, notify slotted completion before cmd_ent_put. This is theoretical issue found by Svace static analyser. Cc: stable@vger.kernel.org Fixes: 485d65e135712 ("net/mlx5: Add a timeout to acquire the command queue semaphore") Fixes: 0e2909c6bec90 ("net/mlx5: Fix variable not being completed when function returns") Signed-off-by: Nikolay Kuratov Reviewed-by: Md Haris Iqbal Reviewed-by: Moshe Shemesh Acked-by: Tariq Toukan Link: https://patch.msgid.link/20260526162932.501584-1-kniv@yandex-team.ru Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c89417c1a1f9..e2895972cc82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1002,12 +1002,13 @@ static void cmd_work_handler(struct work_struct *work) ent->callback(-EBUSY, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); + complete(&ent->slotted); cmd_ent_put(ent); } else { ent->ret = -EBUSY; complete(&ent->done); + complete(&ent->slotted); } - complete(&ent->slotted); return; } alloc_ret = cmd_alloc_index(cmd, ent); @@ -1017,13 +1018,14 @@ static void cmd_work_handler(struct work_struct *work) ent->callback(-EAGAIN, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); + complete(&ent->slotted); cmd_ent_put(ent); } else { ent->ret = -EAGAIN; complete(&ent->done); + complete(&ent->slotted); } up(&cmd->vars.sem); - complete(&ent->slotted); return; } } else { -- cgit v1.2.3 From 73bf3cca7de6a73f53b6a52dc3b1c82ae5667a4d Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Thu, 28 May 2026 16:03:20 +0200 Subject: pcnet32: stop holding device spin lock during napi_complete_done napi_complete_done may call gro_flush_normal (though not currently, as GRO is unsupported at the moment), which may result in packet TX. This will eventually result in calling pcnet32_start_xmit - resulting in a deadlock while trying to re-acquire the already locked spin lock. It is safe to split the spinlock block into two, because the hardware registers are still protected from concurrent access, and the two blocks perform unrelated operations that don't need to happen atomically. Fixes: 5b2ec6f2be51 ("pcnet32: use napi_complete_done()") Reviewed-by: Andrew Lunn Signed-off-by: Oscar Maes Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260528140320.5556-1-oscmaes92@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pcnet32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 911808ab13a7..4f3076d4ea34 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1407,8 +1407,10 @@ static int pcnet32_poll(struct napi_struct *napi, int budget) pcnet32_restart(dev, CSR0_START); netif_wake_queue(dev); } + spin_unlock_irqrestore(&lp->lock, flags); if (work_done < budget && napi_complete_done(napi, work_done)) { + spin_lock_irqsave(&lp->lock, flags); /* clear interrupt masks */ val = lp->a->read_csr(ioaddr, CSR3); val &= 0x00ff; @@ -1416,9 +1418,9 @@ static int pcnet32_poll(struct napi_struct *napi, int budget) /* Set interrupt enable. */ lp->a->write_csr(ioaddr, CSR0, CSR0_INTEN); + spin_unlock_irqrestore(&lp->lock, flags); } - spin_unlock_irqrestore(&lp->lock, flags); return work_done; } -- cgit v1.2.3 From 8173d22b211f615015f7b35f48ab11a6dd78dc99 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 29 May 2026 21:03:00 +0000 Subject: net: lan743x: permit VLAN-tagged packets up to configured MTU VLAN-tagged interfaces on lan743x devices were previously unreachable via SSH and failed to respond to large ping packets (e.g. "ping -s 1469" given MTU=1500). In these scenarios, "ethtool -S" reports non-zero "RX Oversize Frame Errors". According to Microchip AN2948, the MAC_RX FSE (VLAN field size enforcement) bit determines whether frames with VLAN tags exceeding the base MTU plus tag length are discarded. The driver must set the MAC_RX.FSE bit before setting MAC_RX.RXEN to allow VLAN-tagged frames up to the interface MTU, preventing them from being treated as oversized. As a result, both the base and VLAN-tagged interfaces can use the same MTU without receive errors. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: David Thompson Reviewed-by: Thangaraj Samynathan Reviewed-by: Nicolai Buchwitz Tested-by: Nicolai Buchwitz # lan7430 on arm64 (RevPi Link: https://patch.msgid.link/20260529210300.433135-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 32 +++++++++++++++++++++++++++ drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 33 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index f3332417162e..ffac22883e49 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1219,6 +1219,36 @@ static void lan743x_mac_set_address(struct lan743x_adapter *adapter, "MAC address set to %pM\n", addr); } +static void lan743x_mac_rx_enable_fse(struct lan743x_adapter *adapter) +{ + u32 mac_rx; + bool rxen; + + mac_rx = lan743x_csr_read(adapter, MAC_RX); + if (mac_rx & MAC_RX_FSE_) + return; + + rxen = mac_rx & MAC_RX_RXEN_; + if (rxen) { + mac_rx &= ~MAC_RX_RXEN_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_, + 1, 1000, 20000, 100); + } + + /* Per AN2948, hardware prevents modification of the FSE bit while the + * MAC receiver is enabled (RXEN bit set). Use separate register write + * to assert the FSE bit before enabling the RXEN bit in MAC_RX + */ + mac_rx |= MAC_RX_FSE_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + + if (rxen) { + mac_rx |= MAC_RX_RXEN_; + lan743x_csr_write(adapter, MAC_RX, mac_rx); + } +} + static int lan743x_mac_init(struct lan743x_adapter *adapter) { bool mac_address_valid = true; @@ -1258,6 +1288,8 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter) lan743x_mac_set_address(adapter, adapter->mac_address); eth_hw_addr_set(netdev, adapter->mac_address); + lan743x_mac_rx_enable_fse(adapter); + return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 160d94a7cee6..1573c8f9c993 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -182,6 +182,7 @@ #define MAC_RX (0x104) #define MAC_RX_MAX_SIZE_SHIFT_ (16) #define MAC_RX_MAX_SIZE_MASK_ (0x3FFF0000) +#define MAC_RX_FSE_ BIT(2) #define MAC_RX_RXD_ BIT(1) #define MAC_RX_RXEN_ BIT(0) -- cgit v1.2.3 From b455410146bf723c7ebcb49ecd5becc0d6611482 Mon Sep 17 00:00:00 2001 From: Tapio Reijonen Date: Fri, 29 May 2026 06:18:57 +0000 Subject: net: fec: fix pinctrl default state restore order on resume In fec_resume(), fec_enet_clk_enable() is called before pinctrl_pm_select_default_state() in the non-WoL path, inverting the ordering used in fec_suspend() which correctly switches to the sleep pinctrl state before disabling clocks. For PHYs with the PHY_RST_AFTER_CLK_EN flag (e.g. TI DP83848 or SMSC LAN87xx), fec_enet_clk_enable() triggers a hardware reset pulse via the phy-reset GPIO. With the GPIO pin still in sleep pinctrl state at that point, the GPIO write has no physical effect and the PHY never receives the required reset after clock enable, leading to unreliable link establishment after system resume. Fix by restoring the default pinctrl state before enabling clocks, making resume the proper mirror of suspend. The call is made unconditionally: fec_suspend() only switches to the sleep pinctrl state on the non-WoL path and leaves the pins in the default state when WoL is enabled, so on a WoL resume the device is already in the default state and pinctrl_pm_select_default_state() is a no-op. Fixes: de40ed31b3c5 ("net: fec: add Wake-on-LAN support") Signed-off-by: Tapio Reijonen Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260529-b4-fec-resume-pinctrl-order-v3-1-6eda0f592fca@vaisala.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f89aa94ce020..6ebde65d7f1b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -5594,6 +5594,7 @@ static int fec_resume(struct device *dev) if (fep->rpm_active) pm_runtime_force_resume(dev); + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) { rtnl_unlock(); @@ -5610,8 +5611,6 @@ static int fec_resume(struct device *dev) val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP); writel(val, fep->hwp + FEC_ECNTRL); fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON; - } else { - pinctrl_pm_select_default_state(&fep->pdev->dev); } fec_restart(ndev); netif_tx_lock_bh(ndev); -- cgit v1.2.3 From 56d0885514491e5ed8f7593400879ab77c52504c Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Thu, 28 May 2026 20:52:40 +0000 Subject: net: sfp: initialize i2c_block_size at adapter configure time sfp->i2c_block_size is only assigned in sfp_sm_mod_probe(), which runs from the state machine timer after SFP_F_PRESENT has been set. Between those two points, sfp_module_eeprom() (the ethtool -m callback) gates only on SFP_F_PRESENT and can be entered with i2c_block_size still at its kzalloc'd value of 0. On a pure-I2C adapter, sfp_i2c_read() then issues an i2c_transfer() with msgs[1].len = 0 inside a loop that subtracts this_len from len each iteration; on adapters that succeed a zero-length read the loop never advances, spinning while holding rtnl_lock. This was previously addressed by initializing i2c_block_size in sfp_alloc() (commit 813c2dd78618), but the initialization was dropped when i2c_block_size was split from i2c_max_block_size. Initialize sfp->i2c_block_size from sfp->i2c_max_block_size in sfp_i2c_configure(), so the field is valid as soon as the adapter is known. sfp_sm_mod_probe() still reassigns it on each module insertion to recover from a per-module clamp to 1 (sfp_id_needs_byte_io). Fixes: 7662abf4db94 ("net: phy: sfp: Add support for SMBus module access") Cc: stable@vger.kernel.org Signed-off-by: Jonas Jelonek Link: https://patch.msgid.link/20260528205242.971410-2-jelonek.jonas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index bd970f753beb..b94b9c433a21 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -822,6 +822,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) return -EINVAL; } + sfp->i2c_block_size = sfp->i2c_max_block_size; return 0; } -- cgit v1.2.3 From a910fb8f7b9e4c566db363e6c2ec378dc7153995 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Fri, 29 May 2026 17:07:57 +0530 Subject: octeontx2-pf: Fix NDC sync operation errors On system reboot "rvu_nicpf 0002:03:00.0: NDC sync operation failed" error messages are shown, even if the operations is successful. This is due to wrong if error check in ndc_syc() function. Fixes: 42c45ac1419c ("octeontx2-af: Sync NIX and NPA contexts from NDC to LLC/DRAM") Signed-off-by: Geetha sowjanya Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Link: https://patch.msgid.link/1780054677-17249-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ee623476e5ff..f9fbf0c17648 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3473,7 +3473,7 @@ static void otx2_ndc_sync(struct otx2_nic *pf) req->nix_lf_rx_sync = 1; req->npa_lf_sync = 1; - if (!otx2_sync_mbox_msg(mbox)) + if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "NDC sync operation failed\n"); mutex_unlock(&mbox->lock); -- cgit v1.2.3 From 9a85ec3dc28b6df246801c19e4d9bae6297a25b0 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Fri, 29 May 2026 17:07:05 +0530 Subject: octeontx2-af: Fix initialization of mcam's entry2target_pffunc field NPC mcam entry stores a mapping between mcam entry and target pcifunc. During initialization of this field, API kmalloc_array has been used which caused some junk values to array. Whereas, the array is expected to be initialized by 0. This patch fixes the same by using kcalloc instead of kmalloc_array. Fixes: 55307fcb9258 ("octeontx2-af: Add mbox messages to install and delete MCAM rules") Signed-off-by: Suman Ghosh Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Link: https://patch.msgid.link/1780054625-17090-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 607d0cf1a778..6bbda0593fcd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2192,8 +2192,8 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) goto free_entry_cntr_map; /* Alloc memory for saving target device of mcam rule */ - mcam->entry2target_pffunc = kmalloc_array(mcam->total_entries, - sizeof(u16), GFP_KERNEL); + mcam->entry2target_pffunc = kcalloc(mcam->total_entries, + sizeof(u16), GFP_KERNEL); if (!mcam->entry2target_pffunc) goto free_cntr_refcnt; -- cgit v1.2.3 From 672bd0519e27c357c43b7f8c0d653fce3817d06e Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Fri, 29 May 2026 19:11:47 +0200 Subject: ptp: vclock: Switch from RCU to SRCU The usage of PTP vClocks leads immediately to the following issues with ptp4l with LOCKDEP and DEBUG_ATOMIC_SLEEP enabled: "BUG: sleeping function called from invalid context". ptp_convert_timestamp() acquires a mutex_t within a RCU read section. This is illegal, because acquiring a mutex_t can result in voluntary scheduling request which is not allowed within a RCU read section. Replace the RCU usage with SRCU where sleeping is allowed. Reported-by: Florian Zeitz Closes: https://lore.kernel.org/all/00a8cce8-410e-4038-98af-49be6d93d7bd@schettke.com/ Fixes: 67d93ffc0f3c ("ptp: vclock: use mutex to fix "sleep on atomic" bug") Signed-off-by: Kurt Kanzenbach Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20260529-vclock_rcu-v2-1-02a5531fab92@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_vclock.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index 915a4f6defc9..84cb527f59cc 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -19,6 +19,8 @@ static DEFINE_SPINLOCK(vclock_hash_lock); static DEFINE_READ_MOSTLY_HASHTABLE(vclock_hash, 8); +DEFINE_STATIC_SRCU(vclock_srcu); + static void ptp_vclock_hash_add(struct ptp_vclock *vclock) { spin_lock(&vclock_hash_lock); @@ -37,7 +39,7 @@ static void ptp_vclock_hash_del(struct ptp_vclock *vclock) spin_unlock(&vclock_hash_lock); - synchronize_rcu(); + synchronize_srcu(&vclock_srcu); } static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) @@ -276,14 +278,16 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) { unsigned int hash = vclock_index % HASH_SIZE(vclock_hash); struct ptp_vclock *vclock; - u64 ns; u64 vclock_ns = 0; + int srcu_idx; + u64 ns; ns = ktime_to_ns(*hwtstamp); - rcu_read_lock(); + srcu_idx = srcu_read_lock(&vclock_srcu); - hlist_for_each_entry_rcu(vclock, &vclock_hash[hash], vclock_hash_node) { + hlist_for_each_entry_srcu(vclock, &vclock_hash[hash], vclock_hash_node, + srcu_read_lock_held(&vclock_srcu)) { if (vclock->clock->index != vclock_index) continue; @@ -294,7 +298,7 @@ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) break; } - rcu_read_unlock(); + srcu_read_unlock(&vclock_srcu, srcu_idx); return ns_to_ktime(vclock_ns); } -- cgit v1.2.3 From b38cae85d1c45ff189d7ecb6ac36f41cdc3d84d0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 2 Jun 2026 11:21:04 +0200 Subject: net: airoha: Fix use-after-free in metadata dst teardown airoha_metadata_dst_free() runs metadata_dst_free() which frees the metadata_dst with kfree() immediately, bypassing the RCU grace period. In the RX path, skb_dst_set_noref() sets a non-refcounted pointer from the skb to the metadata_dst. This function requires RCU read-side protection and the dst must remain valid until all RCU readers complete. Since metadata_dst_free() calls kfree() directly, an use-after-free can occur if any skb still holds a noref pointer to the dst when the driver tears it down. Replace metadata_dst_free() with dst_release() which properly goes through the refcount path: when the refcount drops to zero, it schedules the actual free via call_rcu_hurry(), ensuring all RCU readers have completed before the memory is freed. Fixes: af3cf757d5c9 ("net: airoha: Move DSA tag in DMA descriptor") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260602-airoha-mtk-metadata-uaf-fix-v1-1-3aaa99d83351@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index cecd66251dba..eab6a98d62b9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2936,7 +2936,7 @@ static void airoha_metadata_dst_free(struct airoha_gdm_port *port) if (!port->dsa_meta[i]) continue; - metadata_dst_free(port->dsa_meta[i]); + dst_release(&port->dsa_meta[i]->dst); } } -- cgit v1.2.3 From 80df409e1a483676826a6c66e693dba6ac507751 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 2 Jun 2026 11:21:05 +0200 Subject: net: ethernet: mtk_eth_soc: Fix use-after-free in metadata dst teardown mtk_free_dev() calls metadata_dst_free() which frees the metadata_dst with kfree() immediately, bypassing the RCU grace period. In the RX path, skb_dst_set_noref() sets a non-refcounted pointer from the skb to the metadata_dst. This function requires RCU read-side protection and the dst must remain valid until all RCU readers complete. Since metadata_dst_free() calls kfree() directly, a use-after-free can occur if any skb still holds a noref pointer to the dst when the driver tears it down. Replace metadata_dst_free() with dst_release() which properly goes through the refcount path: when the refcount drops to zero, it schedules the actual free via call_rcu_hurry(), ensuring all RCU readers have completed before the memory is freed. Fixes: 2d7605a72906 ("net: ethernet: mtk_eth_soc: enable hardware DSA untagging") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260602-airoha-mtk-metadata-uaf-fix-v1-2-3aaa99d83351@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8d225bc9f063..7d771168b990 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4491,7 +4491,7 @@ static int mtk_free_dev(struct mtk_eth *eth) for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) { if (!eth->dsa_meta[i]) break; - metadata_dst_free(eth->dsa_meta[i]); + dst_release(ð->dsa_meta[i]->dst); } return 0; -- cgit v1.2.3 From 1231623fd3b5aa6b41cce799ffb0d82e10914be4 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 29 May 2026 16:47:00 +0200 Subject: geneve: fix length used in GRO hint UDP checksum adjustment In geneve_post_decap_hint the length used for adjusting the UDP checksum should be 'skb->len - gro_hint->nested_tp_offset' (UDP length) instead of 'skb->len - gro_hint->nested_nh_offset' (IP length). Fixes: fd0dd796576e ("geneve: use GRO hint option in the RX path") Cc: Paolo Abeni Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260521131436.748832-1-jhs%40mojatatu.com Signed-off-by: Antoine Tenart Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260529144713.780938-1-atenart@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c6563367d382..715180c3a1b3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -632,7 +632,7 @@ static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, uh = udp_hdr(skb); uh->len = htons(skb->len - gro_hint->nested_tp_offset); if (uh->check) { - len = skb->len - gro_hint->nested_nh_offset; + len = skb->len - gro_hint->nested_tp_offset; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; if (gro_hint->nested_is_v6) uh->check = ~udp_v6_check(len, &ipv6h->saddr, -- cgit v1.2.3 From a764b0e8317a863006e05732e1aefe821b9d8c2d Mon Sep 17 00:00:00 2001 From: ZhaoJinming Date: Mon, 1 Jun 2026 16:56:49 +0800 Subject: net: bonding: fix NULL pointer dereference in bond_do_ioctl() In bond_do_ioctl(), slave_dev is obtained via __dev_get_by_name() which can return NULL if the requested interface name does not exist. However, the subsequent slave_dbg() call is placed before the NULL check: slave_dev = __dev_get_by_name(net, ifr->ifr_slave); slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); //here if (!slave_dev) return -ENODEV; The slave_dbg() macro expands to netdev_dbg(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ...) which unconditionally dereferences slave_dev->name before the NULL check is performed. This results in a NULL pointer dereference kernel oops when a user calls bonding ioctl (e.g. SIOCBONDENSLAVE, SIOCBONDRELEASE, etc.) with a non-existent slave interface name. This is reachable from userspace via the bonding ioctl interface with CAP_NET_ADMIN capability, making it a potential local denial-of-service vector. Fix by moving the slave_dbg() call after the NULL check. Fixes: e2a7420df2e0 ("bonding/main: convert to using slave printk macros") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: ZhaoJinming Link: https://patch.msgid.link/20260601085649.4029067-1-zhaojinming@uniontech.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 82e779f7916b..8e75453ce0ef 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4621,11 +4621,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd slave_dev = __dev_get_by_name(net, ifr->ifr_slave); - slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); - if (!slave_dev) return -ENODEV; + slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); + switch (cmd) { case SIOCBONDENSLAVE: res = bond_enslave(bond_dev, slave_dev, NULL); -- cgit v1.2.3 From 1d31eb27e570daa04f5373345f9ac98c95863be9 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Tue, 2 Jun 2026 10:28:53 +0530 Subject: octeontx2-af: npc: Fix CPT channel mask in npc_install_flow Use the CPT-aware NIX channel mask in the npc_install_flow path so that when the host PF installs steering rules in kernel for a VF used from userspace (e.g. DPDK), MCAM entries see the same channel mask semantics as other RX paths. Fixes: 56bcef528bd8 ("octeontx2-af: Use npc_install_flow API for promisc and broadcast entries") Cc: Naveen Mamindlapalli Signed-off-by: Nithin Dabilpuram Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260602045853.1558530-1-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 32 +++++++++++----------- .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index de3fbd3d15d6..65397daae4c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1145,6 +1145,7 @@ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int lf, int slot); int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc); int rvu_cpt_init(struct rvu *rvu); +u32 rvu_get_cpt_chan_mask(struct rvu *rvu); #define NDC_AF_BANK_MASK GENMASK_ULL(7, 0) #define NDC_AF_BANK_LINE_MASK GENMASK_ULL(31, 16) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 6bbda0593fcd..d301a3f0f87a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -701,6 +701,19 @@ void npc_set_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, return rvu_write64(rvu, blkaddr, reg, cfg); } +u32 rvu_get_cpt_chan_mask(struct rvu *rvu) +{ + /* For cn10k the upper two bits of the channel number are + * cpt channel number. with masking out these bits in the + * mcam entry, same entry used for NIX will allow packets + * received from cpt for parsing. + */ + if (!is_rvu_otx2(rvu)) + return NIX_CHAN_CPT_X2P_MASK; + else + return 0xFFFu; +} + void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, u8 *mac_addr) { @@ -750,7 +763,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; - req.chan_mask = 0xFFFU; + req.chan_mask = rvu_get_cpt_chan_mask(rvu); req.intf = pfvf->nix_rx_intf; req.op = action.op; req.hdr.pcifunc = 0; /* AF is requester */ @@ -845,11 +858,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, * mcam entry, same entry used for NIX will allow packets * received from cpt for parsing. */ - if (!is_rvu_otx2(rvu)) { - req.chan_mask = NIX_CHAN_CPT_X2P_MASK; - } else { - req.chan_mask = 0xFFFU; - } + req.chan_mask = rvu_get_cpt_chan_mask(rvu); if (chan_cnt > 1) { if (!is_power_of_2(chan_cnt)) { @@ -1053,16 +1062,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, ether_addr_copy(req.mask.dmac, mac_addr); req.features = BIT_ULL(NPC_DMAC); - /* For cn10k the upper two bits of the channel number are - * cpt channel number. with masking out these bits in the - * mcam entry, same entry used for NIX will allow packets - * received from cpt for parsing. - */ - if (!is_rvu_otx2(rvu)) - req.chan_mask = NIX_CHAN_CPT_X2P_MASK; - else - req.chan_mask = 0xFFFU; - + req.chan_mask = rvu_get_cpt_chan_mask(rvu); req.channel = chan; req.intf = pfvf->nix_rx_intf; req.entry = index; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 6ae9cdcb608b..34f1e066707b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1820,7 +1820,7 @@ process_flow: /* ignore chan_mask in case pf func is not AF, revisit later */ if (!is_pffunc_af(req->hdr.pcifunc)) - req->chan_mask = 0xFFF; + req->chan_mask = rvu_get_cpt_chan_mask(rvu); err = npc_check_unsupported_flows(rvu, req->features, req->intf); if (err) { -- cgit v1.2.3 From ab1ecaabe74b7d86c38ab2ab44bd56cdcc33645a Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Tue, 2 Jun 2026 19:46:59 +0800 Subject: rtase: Reset TX subqueue when clearing TX ring rtase_tx_clear() clears the TX ring and resets the ring indexes. However, the TX queue state and BQL accounting are not reset at the same time. This may leave __QUEUE_STATE_STACK_XOFF asserted after rtase_sw_reset(), preventing new TX packets from being scheduled. Reset the TX subqueue when clearing the TX ring so the TX queue state and BQL accounting are restored together. Fixes: 5a2a2f15244c ("rtase: Implement the rtase_down function") Cc: stable@vger.kernel.org Signed-off-by: Justin Lai Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260602114659.12335-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index ef13109c49cf..6ccbefb5acf2 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -239,6 +239,8 @@ static void rtase_tx_clear(struct rtase_private *tp) rtase_tx_clear_range(ring, ring->dirty_idx, RTASE_NUM_DESC); ring->cur_idx = 0; ring->dirty_idx = 0; + + netdev_tx_reset_subqueue(tp->dev, i); } } -- cgit v1.2.3 From aa6ca1c5c338907817374b59f7551fd855a88754 Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Tue, 2 Jun 2026 11:51:36 -0700 Subject: vxlan: vnifilter: send notification on VNI add When a new VNI is added to a vxlan device with vnifilter enabled, no RTM_NEWTUNNEL notification is sent to userspace. This means 'bridge monitor vni' never shows VNI add events, even though VNI delete events are reported correctly. The bug is in vxlan_vni_add(), where the notification is guarded by 'if (changed)'. The 'changed' flag is set by vxlan_vni_update_group() only when the multicast group or remote IP is modified, but for a new VNI added without a group (e.g. in L3 VxLAN interface scenarios), the function returns early without setting changed=true. Since this is a new VNI, the notification should be sent unconditionally. The notification is not guarded by the return value of vxlan_vni_update_group() because, at this point, the VNI has already been inserted into the hash table and list with no rollback on error. The VNI will be visible in 'bridge vni show' regardless, so userspace should be informed. This is consistent with vxlan_vni_del() which also notifies unconditionally. The 'if (changed)' guard remains correct in vxlan_vni_update(), which handles the case where a VNI already exists and is being re-added -- there, we only want to notify if the group/remote actually changed. Reproducer: # ip link add vxlan100 type vxlan dstport 4789 local 10.0.0.1 \ nolearning external vnifilter # ip link set vxlan100 up # bridge monitor vni & # bridge vni add vni 1000 dev vxlan100 # no notification # bridge vni delete vni 1000 dev vxlan100 # notification received Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Reported-by: Chirag Shah Signed-off-by: Andy Roulin Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260602185138.253265-2-aroulin@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_vnifilter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 2042369379ff..f2a202d46892 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -759,8 +759,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan, err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, extack); - if (changed) - vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return err; } -- cgit v1.2.3 From 84683b5b60c7274e2c8f7f413d39d78d3db5540f Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Tue, 2 Jun 2026 11:51:37 -0700 Subject: vxlan: vnifilter: fix spurious notification on VNI update When a VNI is re-added with the same attributes (e.g. same group or no group), vxlan_vni_update() sends a spurious RTM_NEWTUNNEL notification even though nothing changed. The bug is that 'if (changed)' tests whether the pointer is non-NULL, not the bool value it points to. Since every caller passes a valid pointer, the condition is always true and the notification fires unconditionally. Fix by dereferencing the pointer: 'if (*changed)'. Reproducer: # ip link add vxlan100 type vxlan dstport 4789 local 10.0.0.1 \ nolearning external vnifilter # ip link set vxlan100 up # bridge monitor vni & # bridge vni add vni 1000 dev vxlan100 # bridge vni add vni 1000 dev vxlan100 # spurious notification Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Andy Roulin Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260602185138.253265-3-aroulin@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_vnifilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index f2a202d46892..3e76f4e21094 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -661,7 +661,7 @@ static int vxlan_vni_update(struct vxlan_dev *vxlan, if (ret) return ret; - if (changed) + if (*changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return 0; -- cgit v1.2.3 From 9fc237f8d49f06d05f0f8e80361047b718894e81 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 3 Jun 2026 14:18:16 +0800 Subject: rtase: Avoid sleeping in get_stats64() The .ndo_get_stats64 callback must not sleep because it can be called when reading /proc/net/dev. rtase_get_stats64() calls rtase_dump_tally_counter(), which polls the tally counter dump bit with read_poll_timeout(). This may sleep while waiting for the hardware counter dump to complete. Use read_poll_timeout_atomic() instead to avoid sleeping in the get_stats64() path. Fixes: 079600489960 ("rtase: Implement net_device_ops") Cc: stable@vger.kernel.org Signed-off-by: Justin Lai Link: https://patch.msgid.link/20260603061816.31356-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 6ccbefb5acf2..55105d34bc79 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1565,8 +1565,9 @@ static void rtase_dump_tally_counter(const struct rtase_private *tp) rtase_w32(tp, RTASE_DTCCR0, cmd); rtase_w32(tp, RTASE_DTCCR0, cmd | RTASE_COUNTER_DUMP); - err = read_poll_timeout(rtase_r32, val, !(val & RTASE_COUNTER_DUMP), - 10, 250, false, tp, RTASE_DTCCR0); + err = read_poll_timeout_atomic(rtase_r32, val, + !(val & RTASE_COUNTER_DUMP), + 10, 250, false, tp, RTASE_DTCCR0); if (err == -ETIMEDOUT) netdev_err(tp->dev, "error occurred in dump tally counter\n"); -- cgit v1.2.3 From b47ff80f280e18ad2310f44293cc057d9b64ff11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Jun 2026 12:35:14 +0000 Subject: bonding: annotate data-races arcound churn variables These fields are updated asynchronously by the bonding state machine in ad_churn_machine() while holding bond->mode_lock. bond_info_show_slave() and bond_fill_slave_info() read them without bond->mode_lock being held, we need to add READ_ONCE() and WRITE_ONCE() annotations. Note that AD_CHURN_MONITOR, AD_CHURN, and AD_NO_CHURN are defined exclusively in (kernel private) include/net/bond_3ad.h header. They should be moved to include/uapi/linux/if_bonding.h or userspace tools will have to hardcode their values. Fixes: 4916f2e2f3fc ("bonding: print churn state via netlink") Fixes: 14c9551a32eb ("bonding: Implement port churn-machine (AD standard 43.4.17).") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260603123514.388226-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 18 ++++++++++-------- drivers/net/bonding/bond_netlink.c | 4 ++-- drivers/net/bonding/bond_procfs.c | 8 ++++---- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index f0aa7d2f2171..985ef66dc333 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1386,8 +1386,8 @@ static void ad_churn_machine(struct port *port) { if (port->sm_vars & AD_PORT_CHURNED) { port->sm_vars &= ~AD_PORT_CHURNED; - port->sm_churn_actor_state = AD_CHURN_MONITOR; - port->sm_churn_partner_state = AD_CHURN_MONITOR; + WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN_MONITOR); + WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN_MONITOR); port->sm_churn_actor_timer_counter = __ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0); port->sm_churn_partner_timer_counter = @@ -1398,20 +1398,22 @@ static void ad_churn_machine(struct port *port) !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { - port->sm_churn_actor_state = AD_NO_CHURN; + WRITE_ONCE(port->sm_churn_actor_state, AD_NO_CHURN); } else { - port->churn_actor_count++; - port->sm_churn_actor_state = AD_CHURN; + WRITE_ONCE(port->churn_actor_count, + port->churn_actor_count + 1); + WRITE_ONCE(port->sm_churn_actor_state, AD_CHURN); } } if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { - port->sm_churn_partner_state = AD_NO_CHURN; + WRITE_ONCE(port->sm_churn_partner_state, AD_NO_CHURN); } else { - port->churn_partner_count++; - port->sm_churn_partner_state = AD_CHURN; + WRITE_ONCE(port->churn_partner_count, + port->churn_partner_count + 1); + WRITE_ONCE(port->sm_churn_partner_state, AD_CHURN); } } } diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index c7d3e0602c83..90365d3f7ebf 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -82,10 +82,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, - ad_port->sm_churn_actor_state)) + READ_ONCE(ad_port->sm_churn_actor_state))) goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, - ad_port->sm_churn_partner_state)) + READ_ONCE(ad_port->sm_churn_partner_state))) goto nla_put_failure_rcu; } rcu_read_unlock(); diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 3714aab1a3d9..3607b62f9b63 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -221,13 +221,13 @@ static void bond_info_show_slave(struct seq_file *seq, seq_printf(seq, "Aggregator ID: %d\n", agg->aggregator_identifier); seq_printf(seq, "Actor Churn State: %s\n", - bond_3ad_churn_desc(port->sm_churn_actor_state)); + bond_3ad_churn_desc(READ_ONCE(port->sm_churn_actor_state))); seq_printf(seq, "Partner Churn State: %s\n", - bond_3ad_churn_desc(port->sm_churn_partner_state)); + bond_3ad_churn_desc(READ_ONCE(port->sm_churn_partner_state))); seq_printf(seq, "Actor Churned Count: %d\n", - port->churn_actor_count); + READ_ONCE(port->churn_actor_count)); seq_printf(seq, "Partner Churned Count: %d\n", - port->churn_partner_count); + READ_ONCE(port->churn_partner_count)); if (capable(CAP_NET_ADMIN)) { seq_puts(seq, "details actor lacp pdu:\n"); -- cgit v1.2.3 From b6197b386677ae5268d4702e23849d9ad53051ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 3 Jun 2026 12:58:45 -0700 Subject: Reapply "bnxt_en: bring back rtnl_lock() in the bnxt_open() path" This reverts commit 850d9248d2eac662f869c766a598c877690c74e5. This reapplies commit 325eb217e41f ("bnxt_en: bring back rtnl_lock() in the bnxt_open() path"). Breno reports a lockdep warning in bnxt. During FW reset the driver may end up calling netif_set_real_num_tx_queues() (if queue count changes), so calls to bnxt_open() still require rtnl_lock. net/sched/sch_generic.c:1416 suspicious rcu_dereference_protected() usage! dev_qdisc_change_real_num_tx+0x54/0xe0 netif_set_real_num_tx_queues+0x4ed/0xa80 __bnxt_open_nic+0x9cb/0x3490 bnxt_open+0x1cb/0x370 bnxt_fw_reset_task+0x80d/0x1e80 process_scheduled_works+0x9c1/0x13b0 The reverted commit was just an optimization / experiment so let's go back to taking the lock. Reported-by: Breno Leitao Link: https://lore.kernel.org/ah726OtFX-Qw3U-R@gmail.com Fixes: 850d9248d2ea ("Revert "bnxt_en: bring back rtnl_lock() in the bnxt_open() path"") Acked-by: Stanislav Fomichev Reviewed-by: Michael Chan Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260603195845.2574426-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 +++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 008c34cff7b4..35e1f8f663c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14388,13 +14388,28 @@ static void bnxt_unlock_sp(struct bnxt *bp) netdev_unlock(bp->dev); } +/* Same as bnxt_lock_sp() with additional rtnl_lock */ +static void bnxt_rtnl_lock_sp(struct bnxt *bp) +{ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); + netdev_lock(bp->dev); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + netdev_unlock(bp->dev); + rtnl_unlock(); +} + /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -14402,9 +14417,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14443,7 +14458,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -15358,15 +15373,17 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!netdev_trylock(bp->dev)) { + while (!rtnl_trylock()) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } + netdev_lock(bp->dev); rc = bnxt_open(bp->dev); if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); netdev_unlock(bp->dev); + rtnl_unlock(); goto ulp_start; } @@ -15386,6 +15403,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); netdev_lock(bp->dev); @@ -16379,7 +16397,7 @@ err_reset: rc); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - bnxt_reset_task(bp, true); + netif_close(dev); return rc; } @@ -17230,6 +17248,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; + rtnl_lock(); netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { @@ -17274,6 +17293,7 @@ static int bnxt_resume(struct device *device) resume_exit: netdev_unlock(bp->dev); + rtnl_unlock(); if (!rc) { bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); @@ -17445,6 +17465,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); + rtnl_lock(); netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); @@ -17462,6 +17483,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); netdev_unlock(netdev); + rtnl_unlock(); if (!err) { bnxt_ulp_start(bp); bnxt_reenable_sriov(bp); -- cgit v1.2.3