diff options
189 files changed, 3479 insertions, 1154 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 37b105a443dd..0650fa014f24 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7077,6 +7077,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjec F: include/linux/debugobjects.h F: lib/debugobjects.c +DEC LANCE NETWORK DRIVER +M: "Maciej W. Rozycki" <macro@orcam.me.uk> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/amd/declance.c + DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" <macro@orcam.me.uk> L: linux-mips@vger.kernel.org diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2f59c0d6f9ec..a3643e67b33f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -289,6 +289,9 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev) skb_put_data(skb, buf, strlen(buf)); data->boot_stage_cache = reg; + if (reg & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(hdev, "Controller device warning (boot_stage: 0x%8.8x)", reg); + reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_IPC_STATUS_REG); snprintf(buf, sizeof(buf), "ipc status: 0x%8.8x", reg); skb_put_data(skb, buf, strlen(buf)); @@ -880,8 +883,11 @@ static inline bool btintel_pcie_in_lockdown(struct btintel_pcie_data *data) static inline bool btintel_pcie_in_error(struct btintel_pcie_data *data) { - return (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR) || - (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER); + if (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(data->hdev, "Controller device warning (boot_stage: 0x%8.8x)", + data->boot_stage_cache); + + return data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER; } static void btintel_pcie_msix_gp1_handler(struct btintel_pcie_data *data) @@ -914,7 +920,8 @@ static void btintel_pcie_msix_gp0_handler(struct btintel_pcie_data *data) data->img_resp_cache = reg; if (btintel_pcie_in_error(data)) { - bt_dev_err(data->hdev, "Controller in error state"); + bt_dev_err(data->hdev, "Controller in error state (boot_stage: 0x%8.8x)", + data->boot_stage_cache); btintel_pcie_dump_debug_registers(data->hdev); return; } diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 3c7bb708362d..f922abd1e7d8 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -48,7 +48,7 @@ #define BTINTEL_PCIE_CSR_BOOT_STAGE_OPFW (BIT(2)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ROM_LOCKDOWN (BIT(10)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_IML_LOCKDOWN (BIT(11)) -#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR (BIT(12)) +#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING (BIT(12)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER (BIT(13)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_HALTED (BIT(14)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_MAC_ACCESS_ON (BIT(16)) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 6fb6ca274808..f70c1b0f8990 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -695,8 +695,13 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, if (data->evt_skb == NULL) goto err_free_wc; - /* Parse and handle the return WMT event */ - wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data; + wmt_evt = skb_pull_data(data->evt_skb, sizeof(*wmt_evt)); + if (!wmt_evt) { + bt_dev_err(hdev, "WMT event too short (%u bytes)", + data->evt_skb->len); + err = -EINVAL; + goto err_free_skb; + } if (wmt_evt->whdr.op != hdr->op) { bt_dev_err(hdev, "Wrong op received %d expected %d", wmt_evt->whdr.op, hdr->op); @@ -712,6 +717,12 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, status = BTMTK_WMT_PATCH_DONE; break; case BTMTK_WMT_FUNC_CTRL: + if (!skb_pull_data(data->evt_skb, + sizeof(wmt_evt_funcc->status))) { + err = -EINVAL; + goto err_free_skb; + } + wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt; if (be16_to_cpu(wmt_evt_funcc->status) == 0x404) status = BTMTK_WMT_ON_DONE; diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index fa679ad0acdf..8201fa7f61e8 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,6 +191,9 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; + if (!ath) + return -ENODEV; + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index b386f91d8b46..db56eead27ce 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -585,6 +585,9 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; + if (!bcsp) + return -ENODEV; + BT_DBG("hu %p count %d rx_state %d rx_count %ld", hu, count, bcsp->rx_state, bcsp->rx_count); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index a889a66a326f..767372707498 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -109,6 +109,9 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; + if (!h4) + return -ENODEV; + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index cfdf75dc2847..d35383718212 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -587,6 +587,9 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) struct h5 *h5 = hu->priv; const unsigned char *ptr = data; + if (!h5) + return -ENODEV; + BT_DBG("%s pending %zu count %d", hu->hdev->name, h5->rx_pending, count); diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 76d61af8a275..140ab55c9fc5 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -12,6 +12,7 @@ #include <net/bluetooth/hci_core.h> #define VERSION "0.1" +#define VIRTBT_RX_BUF_SIZE 1000 enum { VIRTBT_VQ_TX, @@ -33,11 +34,11 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt) struct sk_buff *skb; int err; - skb = alloc_skb(1000, GFP_KERNEL); + skb = alloc_skb(VIRTBT_RX_BUF_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - sg_init_one(sg, skb->data, 1000); + sg_init_one(sg, skb->data, VIRTBT_RX_BUF_SIZE); err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); if (err < 0) { @@ -197,6 +198,7 @@ static int virtbt_shutdown_generic(struct hci_dev *hdev) static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) { + size_t min_hdr; __u8 pkt_type; pkt_type = *((__u8 *) skb->data); @@ -204,16 +206,32 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) switch (pkt_type) { case HCI_EVENT_PKT: + min_hdr = sizeof(struct hci_event_hdr); + break; case HCI_ACLDATA_PKT: + min_hdr = sizeof(struct hci_acl_hdr); + break; case HCI_SCODATA_PKT: + min_hdr = sizeof(struct hci_sco_hdr); + break; case HCI_ISODATA_PKT: - hci_skb_pkt_type(skb) = pkt_type; - hci_recv_frame(vbt->hdev, skb); + min_hdr = sizeof(struct hci_iso_hdr); break; default: kfree_skb(skb); - break; + return; + } + + if (skb->len < min_hdr) { + bt_dev_err_ratelimited(vbt->hdev, + "rx pkt_type 0x%02x payload %u < hdr %zu\n", + pkt_type, skb->len, min_hdr); + kfree_skb(skb); + return; } + + hci_skb_pkt_type(skb) = pkt_type; + hci_recv_frame(vbt->hdev, skb); } static void virtbt_rx_work(struct work_struct *work) @@ -227,8 +245,15 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb_put(skb, len); - virtbt_rx_handle(vbt, skb); + if (!len || len > VIRTBT_RX_BUF_SIZE) { + bt_dev_err_ratelimited(vbt->hdev, + "rx reply len %u outside [1, %u]\n", + len, VIRTBT_RX_BUF_SIZE); + kfree_skb(skb); + } else { + skb_put(skb, len); + virtbt_rx_handle(vbt, skb); + } if (virtbt_add_inbuf(vbt) < 0) return; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b9423389c2ef..44d670904ad8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -25,6 +25,9 @@ #include "mt7530.h" +#define MT7530_STATS_POLL_INTERVAL (1 * HZ) +#define MT7530_STATS_RATE_LIMIT (HZ / 10) + static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs) { return container_of(pcs, struct mt753x_pcs, pcs); @@ -906,10 +909,9 @@ static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port, *ranges = mt7530_rmon_ranges; } -static void mt7530_get_stats64(struct dsa_switch *ds, int port, - struct rtnl_link_stats64 *storage) +static void mt7530_read_port_stats64(struct mt7530_priv *priv, int port, + struct rtnl_link_stats64 *storage) { - struct mt7530_priv *priv = ds->priv; uint64_t data; /* MIB counter doesn't provide a FramesTransmittedOK but instead @@ -951,6 +953,54 @@ static void mt7530_get_stats64(struct dsa_switch *ds, int port, &storage->rx_crc_errors); } +static void mt7530_stats_refresh(struct mt7530_priv *priv) +{ + struct rtnl_link_stats64 stats = {}; + struct dsa_port *dp; + int port; + + dsa_switch_for_each_user_port(dp, priv->ds) { + port = dp->index; + + mt7530_read_port_stats64(priv, port, &stats); + + spin_lock_bh(&priv->stats_lock); + priv->ports[port].stats = stats; + priv->stats_last = jiffies; + spin_unlock_bh(&priv->stats_lock); + } +} + +static void mt7530_stats_poll(struct work_struct *work) +{ + struct mt7530_priv *priv = container_of(work, struct mt7530_priv, + stats_work.work); + + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); +} + +static void mt7530_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *storage) +{ + struct mt7530_priv *priv = ds->priv; + bool refresh; + + if (priv->bus) { + spin_lock_bh(&priv->stats_lock); + *storage = priv->ports[port].stats; + refresh = time_after(jiffies, priv->stats_last + + MT7530_STATS_RATE_LIMIT); + spin_unlock_bh(&priv->stats_lock); + if (refresh) + mod_delayed_work(system_percpu_wq, + &priv->stats_work, 0); + } else { + mt7530_read_port_stats64(priv, port, storage); + } +} + static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats) { @@ -3137,9 +3187,24 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq_domain) mt7530_free_mdio_irq(priv); + if (!ret && priv->bus) { + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); + } + return ret; } +static void +mt753x_teardown(struct dsa_switch *ds) +{ + struct mt7530_priv *priv = ds->priv; + + if (priv->bus) + cancel_delayed_work_sync(&priv->stats_work); +} + static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { @@ -3257,6 +3322,7 @@ static int mt7988_setup(struct dsa_switch *ds) static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .teardown = mt753x_teardown, .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, @@ -3395,6 +3461,9 @@ mt7530_probe_common(struct mt7530_priv *priv) priv->ds->ops = &mt7530_switch_ops; priv->ds->phylink_mac_ops = &mt753x_phylink_mac_ops; mutex_init(&priv->reg_mutex); + spin_lock_init(&priv->stats_lock); + INIT_DELAYED_WORK(&priv->stats_work, mt7530_stats_poll); + dev_set_drvdata(dev, priv); return 0; diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 3e0090bed298..dd33b0df3419 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -796,6 +796,7 @@ struct mt7530_fdb { * @pvid: The VLAN specified is to be considered a PVID at ingress. Any * untagged frames will be assigned to the related VLAN. * @sgmii_pcs: Pointer to PCS instance for SerDes ports + * @stats: Cached port statistics for MDIO-connected switches */ struct mt7530_port { bool enable; @@ -803,6 +804,7 @@ struct mt7530_port { u32 pm; u16 pvid; struct phylink_pcs *sgmii_pcs; + struct rtnl_link_stats64 stats; }; /* Port 5 mode definitions of the MT7530 switch */ @@ -875,6 +877,9 @@ struct mt753x_info { * @create_sgmii: Pointer to function creating SGMII PCS instance(s) * @active_cpu_ports: Holding the active CPU ports * @mdiodev: The pointer to the MDIO device structure + * @stats_lock: Protects cached per-port stats from concurrent access + * @stats_work: Delayed work for polling MIB counters on MDIO switches + * @stats_last: Jiffies timestamp of last MIB counter poll */ struct mt7530_priv { struct device *dev; @@ -900,6 +905,9 @@ struct mt7530_priv { int (*create_sgmii)(struct mt7530_priv *priv); u8 active_cpu_ports; struct mdio_device *mdiodev; + spinlock_t stats_lock; /* protects cached stats counters */ + struct delayed_work stats_work; + unsigned long stats_last; }; struct mt7530_hw_vlan_entry { diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index f8b3d53bccad..d0c0c0ec8a80 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2120,14 +2120,12 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; error_unmap: - while (!list_empty(&tx_list)) { - e = list_first_entry(&tx_list, struct airoha_queue_entry, - list); + list_for_each_entry(e, &tx_list, list) { dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len, DMA_TO_DEVICE); e->dma_addr = 0; - list_move_tail(&e->list, &q->tx_list); } + list_splice(&tx_list, &q->tx_list); spin_unlock_bh(&q->lock); error: diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 60b7e53206d1..3d3b09010d48 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -135,11 +135,11 @@ */ #define XGBE_TSTAMP_SSINC 20 #define XGBE_TSTAMP_SNSINC 0 -#define XGBE_PTP_ACT_CLK_FREQ 500000000 +#define XGBE_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_TSTAMP_SSINC) #define XGBE_V2_TSTAMP_SSINC 0xA #define XGBE_V2_TSTAMP_SNSINC 0 -#define XGBE_V2_PTP_ACT_CLK_FREQ 1000000000 +#define XGBE_V2_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_V2_TSTAMP_SSINC) /* Define maximum supported values */ #define XGBE_MAX_PPS_OUT 4 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8c55874f44ca..008c34cff7b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3825,7 +3825,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (!bp->max_tpa_v2) return 0; - bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + bp->max_tpa = min_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + /* Older P5 FW sets max_tpa_v2 low by mistake except NPAR */ + if (bp->max_tpa <= 32 && BNXT_CHIP_P5(bp) && !BNXT_NPAR(bp)) + bp->max_tpa = MAX_TPA_P5; } for (i = 0; i < bp->rx_nr_rings; i++) { @@ -17360,9 +17363,14 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) netdev_info(bp->dev, "PCI Slot Reset\n"); - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) - msleep(900); + if (test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) { + /* After DPC, the chip should return CRS when the vendor ID + * config register is read until it is ready. On all chips, + * this is not happening reliably so add a 5-second delay as a + * workaround. + */ + msleep(5000); + } netdev_lock(netdev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 53f336db4fcc..5d41dc1bc782 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -419,31 +419,13 @@ void bnxt_ptp_reapply_pps(struct bnxt *bp) } } -static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns, - u64 *cycles_delta) -{ - u64 cycles_now; - u64 nsec_now, nsec_delta; - int rc; - - rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now); - if (rc) - return rc; - - nsec_now = bnxt_timecounter_cyc2time(ptp, cycles_now); - - nsec_delta = target_ns - nsec_now; - *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult); - return 0; -} - static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, struct ptp_clock_request *rq) { struct hwrm_func_ptp_cfg_input *req; struct bnxt *bp = ptp->bp; struct timespec64 ts; - u64 target_ns, delta; + u64 target_ns; u16 enables; int rc; @@ -451,10 +433,6 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, ts.tv_nsec = rq->perout.start.nsec; target_ns = timespec64_to_ns(&ts); - rc = bnxt_get_target_cycles(ptp, target_ns, &delta); - if (rc) - return rc; - rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG); if (rc) return rc; @@ -468,7 +446,10 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, req->ptp_freq_adj_dll_phase = 0; req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC); req->ptp_freq_adj_ext_up = 0; - req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta); + req->ptp_freq_adj_ext_phase_lower = + cpu_to_le32(lower_32_bits(target_ns)); + req->ptp_freq_adj_ext_phase_upper = + cpu_to_le32(upper_32_bits(target_ns)); return hwrm_req_send(bp, req); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 052bf69cfa4c..5c751933da6a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -175,8 +175,14 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, ulp->handle = handle; rcu_assign_pointer(ulp->ulp_ops, ulp_ops); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { + rc = bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (rc) { + netdev_err(dev, "Failed to configure dual VNIC mode\n"); + RCU_INIT_POINTER(ulp->ulp_ops, NULL); + goto exit; + } + } edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 4824232f4890..065cbbf52686 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1491,6 +1491,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE); if (!gpage) { dev_err(geth->dev, "could not find mapping\n"); + if (skb) { + napi_free_frags(&port->napi); + port->stats.rx_dropped++; + skb = NULL; + } continue; } page = gpage->page; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index e663bb5e614e..e691144e8756 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -330,6 +330,7 @@ struct enetc_si { struct workqueue_struct *workqueue; struct work_struct rx_mode_task; struct dentry *debugfs_root; + struct enetc_msg_swbd msg; /* Only valid for VSI */ }; #define ENETC_SI_ALIGN 32 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 6c4b374bcb0e..df8e95cc47d0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -17,11 +17,36 @@ static void enetc_msg_vsi_write_msg(struct enetc_hw *hw, enetc_wr(hw, ENETC_VSIMSGSNDAR0, val); } +static void enetc_msg_dma_free(struct device *dev, struct enetc_msg_swbd *msg) +{ + if (msg->vaddr) { + dma_free_coherent(dev, msg->size, msg->vaddr, msg->dma); + msg->vaddr = NULL; + } +} + static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) { + struct device *dev = &si->pdev->dev; int timeout = 100; u32 vsimsgsr; + /* The VSI mailbox may be busy if last message was not yet processed + * by PSI. So need to check the mailbox status before sending. + */ + vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR); + if (vsimsgsr & ENETC_VSIMSGSR_MB) { + /* It is safe to free the DMA buffer here, the caller does + * not access the DMA buffer if enetc_msg_vsi_send() fails. + */ + enetc_msg_dma_free(dev, msg); + dev_err(dev, "VSI mailbox is busy\n"); + return -EIO; + } + + /* Free the DMA buffer of the last message */ + enetc_msg_dma_free(dev, &si->msg); + si->msg = *msg; enetc_msg_vsi_write_msg(&si->hw, msg); do { @@ -32,12 +57,15 @@ static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) usleep_range(1000, 2000); } while (--timeout); - if (!timeout) + if (!timeout) { + dev_err(dev, "VSI mailbox timeout\n"); + return -ETIMEDOUT; + } /* check for message delivery error */ if (vsimsgsr & ENETC_VSIMSGSR_MS) { - dev_err(&si->pdev->dev, "VSI command execute error: %d\n", + dev_err(dev, "VSI command execute error: %d\n", ENETC_SIMSGSR_GET_MC(vsimsgsr)); return -EIO; } @@ -50,7 +78,6 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, { struct enetc_msg_cmd_set_primary_mac *cmd; struct enetc_msg_swbd msg; - int err; msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64); msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma, @@ -67,11 +94,7 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, memcpy(&cmd->mac, saddr, sizeof(struct sockaddr)); /* send the command and wait */ - err = enetc_msg_vsi_send(priv->si, &msg); - - dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma); - - return err; + return enetc_msg_vsi_send(priv->si, &msg); } static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) @@ -259,6 +282,7 @@ static void enetc_vf_remove(struct pci_dev *pdev) { struct enetc_si *si = pci_get_drvdata(pdev); struct enetc_ndev_priv *priv; + struct enetc_msg_swbd msg; priv = netdev_priv(si->ndev); unregister_netdev(si->ndev); @@ -270,7 +294,9 @@ static void enetc_vf_remove(struct pci_dev *pdev) free_netdev(si->ndev); + msg = si->msg; enetc_pci_remove(pdev); + enetc_msg_dma_free(&pdev->dev, &msg); } static const struct pci_device_id enetc_vf_id_table[] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 3debf2fae1a4..6f13296303cb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -249,34 +249,21 @@ DEFINE_SHOW_ATTRIBUTE(npc_defrag); int npc_cn20k_debugfs_init(struct rvu *rvu) { struct npc_priv_t *npc_priv = npc_priv_get(); - struct dentry *npc_dentry; - npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_mcam_layout_fops); + debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_mcam_layout_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, + rvu, &npc_mcam_default_fops); - npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, - rvu, &npc_mcam_default_fops); + debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_idx2vidx_map_fops); - npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_idx2vidx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_defrag_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_defrag_fops); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 7291fdb89b03..6b3f453fd500 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -798,7 +798,7 @@ program_mkex_extr: iounmap(mkex_prfl_addr); } -void +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, int index, bool enable) { @@ -808,7 +808,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, u64 cfg, hw_prio; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + if (kw_type == NPC_MCAM_KEY_X2) { cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, @@ -819,7 +824,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); - return; + return 0; } /* For NPC_CN20K_MCAM_KEY_X4 keys, both the banks @@ -836,10 +841,12 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); } + + return 0; } -void -npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) +static void +npc_clear_x2_entry(struct rvu *rvu, int blkaddr, int bank, int index) { rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1), @@ -873,6 +880,33 @@ npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0); } +int +npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int mcam_idx) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int bank = npc_get_bank(mcam, mcam_idx); + u8 kw_type; + int index; + + if (npc_mcam_idx_2_key_type(rvu, mcam_idx, &kw_type)) + return -EINVAL; + + index = mcam_idx & (mcam->banksize - 1); + + if (kw_type == NPC_MCAM_KEY_X2) { + npc_clear_x2_entry(rvu, blkaddr, bank, index); + return 0; + } + + /* For NPC_MCAM_KEY_X4 keys, both the banks + * need to be programmed with the same value. + */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) + npc_clear_x2_entry(rvu, blkaddr, bank, index); + + return 0; +} + static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx, u64 *cam0, u64 *cam1) { @@ -1014,48 +1048,27 @@ static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, kw, req_kw_type); } -static void -npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, - int bank, u8 kw_type, bool enable, u8 hw_prio) -{ - struct npc_mcam *mcam = &rvu->hw->mcam; - u64 bank_cfg; - - bank_cfg = (u64)hw_prio << 24; - if (enable) - bank_cfg |= 0x1; - - if (kw_type == NPC_MCAM_KEY_X2) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - return; - } - - /* For NPC_MCAM_KEY_X4 keys, both the banks - * need to be programmed with the same value. - */ - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - } -} - -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type) +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type) { struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; int bank = index / mcam->banksize; + u64 bank_cfg = (u64)hw_prio << 24; int kw = 0; u8 kw_type; + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + /* Disable before mcam entry update */ - npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false)) + return -EINVAL; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); /* CAM1 takes the comparison value and * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'. * CAM1<n> = 0 & CAM0<n> = 1 => match if key<n> = 0 @@ -1064,7 +1077,7 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, */ if (kw_type == NPC_MCAM_KEY_X2) { /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, bank, mcam_idx); npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, mcam_idx, intf, entry, bank, kw_type, kw, req_kw_type); @@ -1085,44 +1098,55 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 1), entry->vtag_action); - goto set_cfg; - } - - /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx); - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx); - npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, - mcam_idx, intf, entry, - kw_type, req_kw_type); - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - /* Set 'action' */ + /* Set HW priority */ rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 0), - entry->action); + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); - /* Set TAG 'action' */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 1), - entry->vtag_action); + } else { + /* Clear mcam entry to avoid writes being suppressed by NPC */ + npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx); - /* Set 'action2' for inline receive */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 2), - entry->action2); + npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, + mcam_idx, intf, entry, + kw_type, req_kw_type); + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + /* Set 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 0), + entry->action); + + /* Set TAG 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 1), + entry->vtag_action); + + /* Set 'action2' for inline receive */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 2), + entry->action2); + + /* Set HW priority */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + } } -set_cfg: /* TODO: */ /* PF installing VF rule */ - npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank, - kw_type, enable, hw_prio); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + return -EINVAL; + + return 0; } -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cfg, sreg, dreg, soff, doff; @@ -1130,12 +1154,20 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) int bank, i, sb, db; int dbank, sbank; + if (src >= mcam->total_entries || dest >= mcam->total_entries) + return -EINVAL; + dbank = npc_get_bank(mcam, dest); sbank = npc_get_bank(mcam, src); - npc_mcam_idx_2_key_type(rvu, src, &src_kwtype); - npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype); + + if (npc_mcam_idx_2_key_type(rvu, src, &src_kwtype)) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype)) + return -EINVAL; + if (src_kwtype != dest_kwtype) - return; + return -EINVAL; src &= (mcam->banksize - 1); dest &= (mcam->banksize - 1); @@ -1170,6 +1202,8 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) if (src_kwtype == NPC_MCAM_KEY_X2) break; } + + return 0; } static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, @@ -1179,21 +1213,37 @@ static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, entry->kw_mask[idx] = cam1 ^ cam0; } -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, - u8 *intf, u8 *ena, u8 *hw_prio) +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, + u8 *intf, u8 *ena, u8 *hw_prio) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cam0, cam1, bank_cfg, cfg; int kw = 0, bank; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (index >= mcam->total_entries) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0)); + entry->action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 1)); + entry->vtag_action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 2)); + entry->action2 = cfg; + + cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1)) & 3; *intf = cfg; @@ -1242,7 +1292,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, bank, 0)); npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); - goto read_action; + return 0; } for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) { @@ -1287,17 +1337,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); } -read_action: - /* 'action' is set to same value for both bank '0' and '1'. - * Hence, reading bank '0' should be enough. - */ - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 0)); - entry->action = cfg; - - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1)); - entry->vtag_action = cfg; + return 0; } int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, @@ -1335,11 +1375,10 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, if (is_pffunc_af(req->hdr.pcifunc)) nix_intf = req->intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); - rc = 0; exit: mutex_unlock(&mcam->lock); return rc; @@ -1361,11 +1400,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu, mutex_lock(&mcam->lock); rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); - if (!rc) - npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, - &rsp->entry_data, &rsp->intf, - &rsp->enable, &rsp->hw_prio); + if (rc) + goto fail; + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, + &rsp->entry_data, &rsp->intf, + &rsp->enable, &rsp->hw_prio); +fail: mutex_unlock(&mcam->lock); return rc; } @@ -1375,11 +1416,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, struct npc_mcam_alloc_and_write_entry_rsp *rsp) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npc_mcam_free_entry_req free_req = { 0 }; struct npc_mcam_alloc_entry_req entry_req; struct npc_mcam_alloc_entry_rsp entry_rsp; struct npc_mcam *mcam = &rvu->hw->mcam; u16 entry = NPC_MCAM_ENTRY_INVALID; - int blkaddr, rc; + struct msg_rsp free_rsp; + int blkaddr, rc, err; u8 nix_intf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -1415,12 +1458,23 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); mutex_unlock(&mcam->lock); + if (rc) { + free_req.hdr.pcifunc = req->hdr.pcifunc; + free_req.entry = entry_rsp.entry; + err = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &free_rsp); + if (err) + dev_err(rvu->dev, + "%s: Error to free mcam idx %u\n", + __func__, entry_rsp.entry); + return rc; + } + rsp->entry = entry_rsp.entry; return 0; } @@ -1480,9 +1534,9 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, read_entry: /* Read the mcam entry */ - npc_cn20k_read_mcam_entry(rvu, blkaddr, index, - &rsp->entry, &intf, - &enable, &hw_prio); + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, index, + &rsp->entry, &intf, + &enable, &hw_prio); mutex_unlock(&mcam->lock); out: return rc; @@ -2305,6 +2359,7 @@ err2: __npc_subbank_mark_free(rvu, sb); err1: kfree(save); + *alloc_cnt = 0; return rc; } @@ -3482,7 +3537,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, { int alloc_cnt1, alloc_cnt2; struct npc_subbank *sb; - int rc, sb_off, i; + int rc, sb_off, i, err; bool deleted; sb = &npc_priv.sb[f->idx]; @@ -3496,6 +3551,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, NPC_MCAM_LOWER_PRIO, false, cnt, save, cnt, true, &alloc_cnt1); + if (alloc_cnt1 < cnt) { rc = __npc_subbank_alloc(rvu, sb, NPC_MCAM_KEY_X2, sb->b1b, @@ -3511,15 +3567,17 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, dev_err(rvu->dev, "%s: Failed to alloc cnt=%u alloc_cnt1=%u alloc_cnt2=%u\n", __func__, cnt, alloc_cnt1, alloc_cnt2); + rc = -ENOSPC; goto fail_free_alloc; } + return 0; fail_free_alloc: for (i = 0; i < alloc_cnt1 + alloc_cnt2; i++) { - rc = npc_mcam_idx_2_subbank_idx(rvu, save[i], - &sb, &sb_off); - if (rc) { + err = npc_mcam_idx_2_subbank_idx(rvu, save[i], + &sb, &sb_off); + if (err) { dev_err(rvu->dev, "%s: Error to find subbank for mcam idx=%u\n", __func__, save[i]); @@ -3565,9 +3623,10 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, struct npc_defrag_node *v, int cnt, u16 *save) { + u16 new_midx, old_midx, vidx, target_pf; struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule, *tmp; int i, vidx_cnt, rc, sb_off; - u16 new_midx, old_midx, vidx; struct npc_subbank *sb; bool deleted; u16 pcifunc; @@ -3607,9 +3666,30 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx, bank)); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false); - npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true); + /* If bug happened during copy/enable mcam, then there is a bug in allocation + * algorithm itself. There is no point in rewinding and returning, as it + * will face further issue. Return error after printing error + */ + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false)) { + dev_err(rvu->dev, + "%s: Error happened while disabling old_mid=%u\n", + __func__, old_midx); + return -EFAULT; + } + + if (npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx)) { + dev_err(rvu->dev, + "%s: Error happened while copying old_midx=%u new_midx=%u\n", + __func__, old_midx, new_midx); + return -EFAULT; + } + + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true)) { + dev_err(rvu->dev, + "%s: Error happened while enabling new_mid=%u\n", + __func__, new_midx); + return -EFAULT; + } midx = new_midx % mcam->banksize; bank = new_midx / mcam->banksize; @@ -3665,8 +3745,21 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, mcam->entry2pfvf_map[new_midx] = pcifunc; /* Counter is not preserved */ mcam->entry2cntr_map[new_midx] = new_midx; + target_pf = mcam->entry2target_pffunc[old_midx]; + mcam->entry2target_pffunc[new_midx] = target_pf; + mcam->entry2target_pffunc[old_midx] = NPC_MCAM_INVALID_MAP; + npc_mcam_set_bit(mcam, new_midx); + /* Note: list order is not functionally required for mcam_rules */ + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + if (rule->entry != old_midx) + continue; + + rule->entry = new_midx; + break; + } + /* Mark as invalid */ v->vidx[vidx_cnt - i - 1] = -1; save[cnt - i - 1] = -1; @@ -3935,6 +4028,13 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, void *val; int i, j; + for (i = 0; i < ARRAY_SIZE(ptr); i++) { + if (!ptr[i]) + continue; + + *ptr[i] = USHRT_MAX; + } + if (!npc_priv.init_done) return 0; @@ -3950,7 +4050,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], pcifunc); - *ptr[0] = USHRT_MAX; return -ESRCH; } @@ -3970,7 +4069,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], pcifunc); - *ptr[3] = USHRT_MAX; return -ESRCH; } @@ -3990,7 +4088,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, __func__, npc_dft_rule_name[i], pcifunc); - *ptr[j] = USHRT_MAX; continue; } @@ -4085,7 +4182,7 @@ int rvu_mbox_handler_npc_get_dft_rl_idxs(struct rvu *rvu, struct msg_req *req, return 0; } -static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) { return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) || is_lbk_vf(rvu, pcifunc); @@ -4093,11 +4190,11 @@ static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) { - struct npc_mcam_free_entry_req free_req = { 0 }; + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 ptr[4] = {[0 ... 3] = USHRT_MAX}; + struct rvu_npc_mcam_rule *rule, *tmp; unsigned long index; - struct msg_rsp rsp; - u16 ptr[4]; - int rc, i; + int blkaddr, rc, i; void *map; if (!npc_priv.init_done) @@ -4155,14 +4252,43 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) } free_rules: + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + for (int i = 0; i < 4; i++) { + if (ptr[i] == USHRT_MAX) + continue; - free_req.hdr.pcifunc = pcifunc; - free_req.all = 1; - rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); - if (rc) - dev_err(rvu->dev, - "%s: Error deleting default entries (pcifunc=%#x\n", - __func__, pcifunc); + mutex_lock(&mcam->lock); + npc_mcam_clear_bit(mcam, ptr[i]); + mcam->entry2pfvf_map[ptr[i]] = NPC_MCAM_INVALID_MAP; + npc_cn20k_enable_mcam_entry(rvu, blkaddr, ptr[i], false); + mcam->entry2target_pffunc[ptr[i]] = 0x0; + mutex_unlock(&mcam->lock); + + rc = npc_cn20k_idx_free(rvu, &ptr[i], 1); + if (rc) { + /* Non recoverable error. Let us WARN and return. Keep system alive to + * enable debugging + */ + WARN(1, "%s Error deleting default entries (pcifunc=%#x) mcam_idx=%u\n", + __func__, pcifunc, ptr[i]); + return; + } + } + + mutex_lock(&mcam->lock); + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + for (int i = 0; i < 4; i++) { + if (ptr[i] != rule->entry) + continue; + + list_del(&rule->list); + kfree(rule); + break; + } + } + mutex_unlock(&mcam->lock); } int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 815d0b257a7e..3d5eb952cc07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -320,21 +320,21 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc); int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type); -void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, - int index, bool enable); -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, - u16 src, u16 dest); -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, u8 *intf, - u8 *ena, u8 *hw_prio); -void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, - int bank, int index); +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type); +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, + int index, bool enable); +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, + u16 src, u16 dest); +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, u8 *intf, + u8 *ena, u8 *hw_prio); +int npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); u16 npc_cn20k_vidx2idx(u16 index); u16 npc_cn20k_idx2vidx(u16 idx); int npc_cn20k_defrag(struct rvu *rvu); +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index ef5b081162eb..f977734ae712 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3577,6 +3577,9 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc, mcam_index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (mcam_index < 0) + return -EINVAL; + err = nix_update_mce_list(rvu, pcifunc, mce_list, mce_idx, mcam_index, add); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index c2ca5ed1d028..3c814d157ab9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -163,14 +163,35 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, if (rc) return -EFAULT; + if (is_lbk_vf(rvu, pcifunc)) { + if (promisc == USHRT_MAX) + return -EINVAL; + return promisc; + } + + if (is_cgx_vf(rvu, pcifunc)) { + if (ucast == USHRT_MAX) + return -EINVAL; + + return ucast; + } + switch (type) { case NIXLF_BCAST_ENTRY: + if (bcast == USHRT_MAX) + return -EINVAL; return bcast; case NIXLF_ALLMULTI_ENTRY: + if (mcast == USHRT_MAX) + return -EINVAL; return mcast; case NIXLF_PROMISC_ENTRY: + if (promisc == USHRT_MAX) + return -EINVAL; return promisc; case NIXLF_UCAST_ENTRY: + if (ucast == USHRT_MAX) + return -EINVAL; return ucast; default: return -EINVAL; @@ -238,10 +259,10 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int actbank = bank; if (is_cn20k(rvu->pdev)) { - if (index < 0 || index >= mcam->banksize * mcam->banks) - return; - - return npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + dev_err(rvu->dev, "Error to %s mcam %u entry\n", + enable ? "enable" : "disable", index); + return; } index &= (mcam->banksize - 1); @@ -258,6 +279,13 @@ static void npc_clear_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int bank = npc_get_bank(mcam, index); int actbank = bank; + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_clear_mcam_entry(rvu, blkaddr, index)) + dev_err(rvu->dev, "%s Failed to clear mcam %u\n", + __func__, index); + return; + } + index &= (mcam->banksize - 1); for (; bank < (actbank + mcam->banks_per_entry); bank++) { rvu_write64(rvu, blkaddr, @@ -424,6 +452,15 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: failed to get ucast entry pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -589,8 +626,8 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)) & 1; } -static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, u16 src, u16 dest) +static int npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 src, u16 dest) { int dbank = npc_get_bank(mcam, dest); int sbank = npc_get_bank(mcam, src); @@ -630,6 +667,7 @@ static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(dest, dbank), cfg); + return 0; } u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, @@ -689,6 +727,12 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* Don't change the action if entry is already enabled * Otherwise RSS action may get overwritten. @@ -744,16 +788,38 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + /* In cn20k, default indexes are installed only for CGX mapped + * and lbk interfaces + */ if (is_cgx_vf(rvu, pcifunc)) index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, NIXLF_PROMISC_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also + * Please note that for lbk(s) "index" and "ucast_idx" + * will be same. */ - ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); + if (is_lbk_vf(rvu, pcifunc)) + ucast_idx = index; + else + ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast/promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -827,6 +893,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -867,6 +941,12 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get bcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } if (!hw->cap.nix_rx_multicast) { /* Early silicon doesn't support pkt replication, @@ -931,12 +1011,25 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* If the corresponding PF's ucast action is RSS, * use the same action for multicast entry also */ ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -1001,6 +1094,13 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -1113,8 +1213,12 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, index = mcam_index; } - if (index >= mcam->total_entries) + if (index < 0 || index >= mcam->total_entries) { + dev_err(rvu->dev, + "%s: Invalid mcam index, pcifunc=%#x\n", + __func__, pcifunc); return; + } bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -1158,16 +1262,18 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, /* If PF's promiscuous entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); /* If PF's allmulti entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); } } @@ -1180,12 +1286,22 @@ void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, int index, blkaddr, mce_idx; struct rvu_pfvf *pfvf; + /* multicast pkt replication is not enabled for AF's VFs & SDP links */ + if (is_lbk_vf(rvu, pcifunc) || is_sdp_pfvf(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get entry for pcifunc=%#x, type=%u\n", + __func__, pcifunc, type); + return; + } /* disable MCAM entry when packet replication is not supported by hw */ if (!hw->cap.nix_rx_multicast && !is_vf(pcifunc)) { @@ -1214,6 +1330,10 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, struct npc_mcam *mcam = &rvu->hw->mcam; int index, blkaddr; + /* only CGX or LBK interfaces have default entries */ + if (is_cn20k(rvu->pdev) && !npc_is_cgx_or_lbk(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; @@ -1223,6 +1343,12 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, pfvf->nix_rx_intf)) { index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -2504,33 +2630,58 @@ void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index) static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc) { + u16 dft_idxs[NPC_DFT_RULE_MAX_ID] = {[0 ... NPC_DFT_RULE_MAX_ID - 1] = USHRT_MAX}; + bool cn20k_dft_rl; u16 index, cntr; int rc; + npc_cn20k_dft_rules_idx_get(rvu, pcifunc, + &dft_idxs[NPC_DFT_RULE_BCAST_ID], + &dft_idxs[NPC_DFT_RULE_MCAST_ID], + &dft_idxs[NPC_DFT_RULE_PROMISC_ID], + &dft_idxs[NPC_DFT_RULE_UCAST_ID]); + /* Scan all MCAM entries and free the ones mapped to 'pcifunc' */ for (index = 0; index < mcam->bmap_entries; index++) { - if (mcam->entry2pfvf_map[index] == pcifunc) { + if (mcam->entry2pfvf_map[index] != pcifunc) + continue; + + cn20k_dft_rl = false; + + if (is_cn20k(rvu->pdev)) { + if (dft_idxs[NPC_DFT_RULE_BCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_MCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_PROMISC_ID] == index || + dft_idxs[NPC_DFT_RULE_UCAST_ID] == index) { + cn20k_dft_rl = true; + } + } + + /* Disable the entry */ + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + + if (!cn20k_dft_rl) { mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; /* Free the entry in bitmap */ npc_mcam_clear_bit(mcam, index); - /* Disable the entry */ - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); - - /* Update entry2counter mapping */ - cntr = mcam->entry2cntr_map[index]; - if (cntr != NPC_MCAM_INVALID_MAP) - npc_unmap_mcam_entry_and_cntr(rvu, mcam, - blkaddr, index, - cntr); mcam->entry2target_pffunc[index] = 0x0; - if (is_cn20k(rvu->pdev)) { - rc = npc_cn20k_idx_free(rvu, &index, 1); - if (rc) - dev_err(rvu->dev, - "Failed to free mcam idx=%u pcifunc=%#x\n", - index, pcifunc); - } } + + /* Update entry2counter mapping */ + cntr = mcam->entry2cntr_map[index]; + if (cntr != NPC_MCAM_INVALID_MAP) + npc_unmap_mcam_entry_and_cntr(rvu, mcam, + blkaddr, index, + cntr); + + if (!is_cn20k(rvu->pdev) || cn20k_dft_rl) + continue; + + rc = npc_cn20k_idx_free(rvu, &index, 1); + if (rc) + dev_err(rvu->dev, + "Failed to free mcam idx=%u pcifunc=%#x\n", + index, pcifunc); } } @@ -3266,7 +3417,10 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, false); /* Copy rule from old entry to new entry */ - npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry); + if (npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry)) { + rc = NPC_MCAM_INVALID_REQ; + break; + } /* Copy counter mapping, if any */ cntr = mcam->entry2cntr_map[old_entry]; @@ -3284,7 +3438,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, /* If shift has failed then report the failed index */ if (index != req->shift_count) { - rc = NPC_MCAM_PERM_DENIED; + if (!rc) + rc = NPC_MCAM_PERM_DENIED; rsp->failed_entry_idx = index; } @@ -3851,6 +4006,12 @@ int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, /* Read the default ucast entry if there is no pkt steering rule */ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + mutex_unlock(&mcam->lock); + rc = NIX_AF_ERR_AF_LF_INVALID; + goto out; + } + read_entry: /* Read the mcam entry */ npc_read_mcam_entry(rvu, mcam, blkaddr, index, &rsp->entry, &intf, @@ -3924,6 +4085,12 @@ void rvu_npc_clear_ucast_entry(struct rvu *rvu, int pcifunc, int nixlf) ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, ucast_idx, false); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index b45798d9fdab..6ae9cdcb608b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1444,7 +1444,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, struct msg_rsp write_rsp; struct mcam_entry *entry; bool new = false; - u16 entry_index; + int entry_index; int err; installed_features = req->features; @@ -1477,6 +1477,14 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, if (req->default_rule) { entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf, NIXLF_UCAST_ENTRY); + + if (entry_index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for target=%#x\n", + __func__, target); + return -EINVAL; + } + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index); } @@ -1980,13 +1988,15 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); - if (is_cn20k(rvu->pdev)) - npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, - cn20k_entry, &intf, - &enable, &hw_prio); - else + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, + cn20k_entry, &intf, + &enable, &hw_prio)) + return -EINVAL; + } else { npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, entry, &intf, &enable); + } npc_update_entry(rvu, NPC_DMAC, &mdata, ether_addr_to_u64(pfvf->mac_addr), 0, @@ -2038,8 +2048,12 @@ void npc_mcam_enable_flows(struct rvu *rvu, u16 target) continue; } - if (rule->vfvlan_cfg) - npc_update_dmac_value(rvu, blkaddr, rule, pfvf); + if (rule->vfvlan_cfg) { + if (npc_update_dmac_value(rvu, blkaddr, rule, pfvf)) + dev_err(rvu->dev, + "Update dmac failed for %u, target=%#x\n", + rule->entry, target); + } if (rule->rx_action.op == NIX_RX_ACTION_DEFAULT) { if (!def_ucast_rule) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 6a50b6dec0fa..d9adb993e64d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -1070,29 +1070,37 @@ static struct psp_dev_ops mlx5_psp_ops = { void mlx5e_psp_unregister(struct mlx5e_priv *priv) { - if (!priv->psp || !priv->psp->psp) + struct mlx5e_psp *psp = priv->psp; + + if (!psp || !psp->psp) return; - psp_dev_unregister(priv->psp->psp); + psp_dev_unregister(psp->psp); + psp->psp = NULL; } void mlx5e_psp_register(struct mlx5e_priv *priv) { + struct mlx5e_psp *psp = priv->psp; + struct psp_dev *psd; + /* FW Caps missing */ if (!priv->psp) return; - priv->psp->caps.assoc_drv_spc = sizeof(u32); - priv->psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; + psp->caps.assoc_drv_spc = sizeof(u32); + psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; if (MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_encrypt) && MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_decrypt)) - priv->psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; + psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; - priv->psp->psp = psp_dev_create(priv->netdev, &mlx5_psp_ops, - &priv->psp->caps, NULL); - if (IS_ERR(priv->psp->psp)) + psd = psp_dev_create(priv->netdev, &mlx5_psp_ops, &psp->caps, NULL); + if (IS_ERR(psd)) { mlx5_core_err(priv->mdev, "PSP failed to register due to %pe\n", - priv->psp->psp); + psd); + return; + } + psp->psp = psd; } int mlx5e_psp_init(struct mlx5e_priv *priv) @@ -1131,22 +1139,18 @@ int mlx5e_psp_init(struct mlx5e_priv *priv) if (!psp) return -ENOMEM; - priv->psp = psp; fs = mlx5e_accel_psp_fs_init(priv); if (IS_ERR(fs)) { err = PTR_ERR(fs); - goto out_err; + kfree(psp); + return err; } psp->fs = fs; + priv->psp = psp; mlx5_core_dbg(priv->mdev, "PSP attached to netdevice\n"); return 0; - -out_err: - priv->psp = NULL; - kfree(psp); - return err; } void mlx5e_psp_cleanup(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5a46870c4b74..8f2b3abe0092 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6023,7 +6023,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (take_rtnl) rtnl_lock(); - mlx5e_psp_register(priv); /* update XDP supported features */ mlx5e_set_xdp_feature(priv); @@ -6036,7 +6035,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_psp_unregister(priv); mlx5e_ktls_cleanup(priv); mlx5e_psp_cleanup(priv); mlx5e_fs_cleanup(priv->fs); @@ -6160,6 +6158,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_fs_init_l2_addr(priv->fs, netdev); mlx5e_ipsec_init(priv); + mlx5e_psp_register(priv); err = mlx5e_macsec_init(priv); if (err) @@ -6230,6 +6229,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); mlx5e_macsec_cleanup(priv); + mlx5e_psp_unregister(priv); mlx5e_ipsec_cleanup(priv); } @@ -6774,9 +6774,11 @@ static int mlx5e_resume(struct auxiliary_device *adev) return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_resume(actual_adev); - return 0; + if (actual_adev) { + err = _mlx5e_resume(actual_adev); + mlx5_sd_put_adev(actual_adev, adev); + } + return err; } static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) @@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) err = _mlx5e_suspend(actual_adev, false); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); return err; } @@ -6912,9 +6916,19 @@ static int mlx5e_probe(struct auxiliary_device *adev, return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_probe(actual_adev); + if (actual_adev) { + err = _mlx5e_probe(actual_adev); + if (err) + goto sd_cleanup; + mlx5_sd_put_adev(actual_adev, adev); + } return 0; + +sd_cleanup: + mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); + return err; } static void _mlx5e_remove(struct auxiliary_device *adev) @@ -6966,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev) _mlx5e_remove(actual_adev); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); } static const struct auxiliary_device_id mlx5e_id_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 762c783156b4..6e199161b008 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -18,6 +18,7 @@ struct mlx5_sd { u8 host_buses; struct mlx5_devcom_comp_dev *devcom; struct dentry *dfs; + u8 state; bool primary; union { struct { /* primary */ @@ -31,6 +32,11 @@ struct mlx5_sd { }; }; +enum mlx5_sd_state { + MLX5_SD_STATE_DOWN = 0, + MLX5_SD_STATE_UP, +}; + static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); @@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); - mlx5_devcom_comp_lock(sd->devcom); - mlx5_devcom_comp_set_ready(sd->devcom, false); - mlx5_devcom_comp_unlock(sd->devcom); mlx5_devcom_unregister_component(sd->devcom); } @@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) struct mlx5_core_dev *primary, *pos, *to; struct mlx5_sd *sd = mlx5_get_sd(dev); u8 alias_key[ACCESS_KEY_LEN]; + struct mlx5_sd *primary_sd; int err, i; err = sd_init(dev); @@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_cleanup; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - return 0; + goto out; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_DOWN) + goto out; for (i = 0; i < ACCESS_KEY_LEN; i++) alias_key[i] = get_random_u8(); @@ -452,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_unregister; - sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary)); - debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id); - debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops); + primary_sd->dfs = + debugfs_create_dir("multi-pf", + mlx5_debugfs_get_dev_root(primary)); + debugfs_create_x32("group_id", 0400, primary_sd->dfs, + &primary_sd->group_id); + debugfs_create_file("primary", 0400, primary_sd->dfs, primary, + &dev_fops); mlx5_sd_for_each_secondary(i, primary, pos) { char name[32]; @@ -464,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) goto err_unset_secondaries; snprintf(name, sizeof(name), "secondary_%d", i - 1); - debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops); + debugfs_create_file(name, 0400, primary_sd->dfs, pos, + &dev_fops); } @@ -472,6 +488,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); sd_print_group(primary); + primary_sd->state = MLX5_SD_STATE_UP; +out: + mlx5_devcom_comp_unlock(sd->devcom); return 0; err_unset_secondaries: @@ -479,8 +498,18 @@ err_unset_secondaries: mlx5_sd_for_each_secondary_to(i, primary, to, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; err_sd_unregister: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; + mlx5_devcom_comp_set_ready(sd->devcom, false); + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); err_sd_cleanup: sd_cleanup(dev); @@ -491,42 +520,97 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary, *pos; + struct mlx5_sd *primary_sd; int i; if (!sd) return; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - goto out; + goto out_unlock; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out_ready_false; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_UP) + goto out_clear_peers; + mlx5_sd_for_each_secondary(i, primary, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; sd_info(primary, "group id %#x, uncombined\n", sd->group_id); -out: + primary_sd->state = MLX5_SD_STATE_DOWN; +out_clear_peers: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; +out_ready_false: + mlx5_devcom_comp_set_ready(sd->devcom, false); +out_unlock: + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); sd_cleanup(dev); } +/* Lock order: + * primary: actual_adev_lock -> SD devcom comp lock + * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock + * The two locks are never held together, so no ABBA. + */ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary; + struct mlx5_adev *primary_adev; if (!sd) return adev; - if (!mlx5_devcom_comp_is_ready(sd->devcom)) + mlx5_devcom_comp_lock(sd->devcom); + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + mlx5_devcom_comp_unlock(sd->devcom); return NULL; + } primary = mlx5_sd_get_primary(dev); - if (dev == primary) + if (!primary || dev == primary) { + mlx5_devcom_comp_unlock(sd->devcom); return adev; + } + + primary_adev = primary->priv.adev[idx]; + get_device(&primary_adev->adev.dev); + mlx5_devcom_comp_unlock(sd->devcom); + + device_lock(&primary_adev->adev.dev); + /* Primary may have completed remove between dropping devcom and + * acquiring device_lock; recheck. + */ + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + device_unlock(&primary_adev->adev.dev); + put_device(&primary_adev->adev.dev); + return NULL; + } + return &primary_adev->adev; +} - return &primary->priv.adev[idx]->adev; +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev) +{ + if (actual_adev != adev) { + device_unlock(&actual_adev->dev); + put_device(&actual_adev->dev); + } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h index 137efaf9aabc..9bfd5b9756b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h @@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx); +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev); int mlx5_sd_init(struct mlx5_core_dev *dev); void mlx5_sd_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index c406a3b56b37..4dea2bb58d2f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -826,7 +826,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netif_tx_stop_all_queues(netdev); if (fbnic_phylink_create(netdev)) { - fbnic_netdev_free(fbd); + free_netdev(netdev); + fbd->netdev = NULL; return NULL; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6a745bb71b5c..eb57b86fbe22 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -31,11 +31,11 @@ enum spx5_target_chiptype { SPX5_TARGET_CT_7552 = 0x7552, /* SparX-5-128 Enterprise */ SPX5_TARGET_CT_7556 = 0x7556, /* SparX-5-160 Enterprise */ SPX5_TARGET_CT_7558 = 0x7558, /* SparX-5-200 Enterprise */ - SPX5_TARGET_CT_7546TSN = 0x47546, /* SparX-5-64i Industrial */ - SPX5_TARGET_CT_7549TSN = 0x47549, /* SparX-5-90i Industrial */ - SPX5_TARGET_CT_7552TSN = 0x47552, /* SparX-5-128i Industrial */ - SPX5_TARGET_CT_7556TSN = 0x47556, /* SparX-5-160i Industrial */ - SPX5_TARGET_CT_7558TSN = 0x47558, /* SparX-5-200i Industrial */ + SPX5_TARGET_CT_7546TSN = 0x0546, /* SparX-5-64i Industrial */ + SPX5_TARGET_CT_7549TSN = 0x0549, /* SparX-5-90i Industrial */ + SPX5_TARGET_CT_7552TSN = 0x0552, /* SparX-5-128i Industrial */ + SPX5_TARGET_CT_7556TSN = 0x0556, /* SparX-5-160i Industrial */ + SPX5_TARGET_CT_7558TSN = 0x0558, /* SparX-5-200i Industrial */ SPX5_TARGET_CT_LAN9694 = 0x9694, /* lan969x-40 */ SPX5_TARGET_CT_LAN9691VAO = 0x9691, /* lan969x-40-VAO */ SPX5_TARGET_CT_LAN9694TSN = 0x9695, /* lan969x-40-TSN */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 04bc8fffaf96..62c49893de3c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -1128,7 +1128,8 @@ int sparx5_port_init(struct sparx5 *sparx5, DEV2G5_PCS1G_SD_CFG(port->portno)); if (conf->portmode == PHY_INTERFACE_MODE_QSGMII || - conf->portmode == PHY_INTERFACE_MODE_SGMII) { + conf->portmode == PHY_INTERFACE_MODE_SGMII || + conf->portmode == PHY_INTERFACE_MODE_1000BASEX) { err = sparx5_serdes_set(sparx5, port, conf); if (err) return err; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 098fbda0d128..d8e816882f02 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -43,8 +43,9 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset) static int mana_gd_init_pf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); - void __iomem *sriov_base_va; + u64 remaining_barsize; u64 sriov_base_off; + u64 sriov_shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -73,10 +74,28 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); + if (sriov_base_off >= gc->bar0_size || + gc->bar0_size - sriov_base_off < + GDMA_PF_REG_SHM_OFF + sizeof(u64) || + !IS_ALIGNED(sriov_base_off, sizeof(u64))) { + dev_err(gc->dev, + "SRIOV base offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_base_off, (u64)gc->bar0_size); + return -EPROTO; + } - sriov_base_va = gc->bar0_va + sriov_base_off; - gc->shm_base = sriov_base_va + - mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + remaining_barsize = gc->bar0_size - sriov_base_off; + sriov_shm_off = mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + if (sriov_shm_off >= remaining_barsize || + remaining_barsize - sriov_shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(sriov_shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SRIOV SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + sriov_base_off + sriov_shm_off; return 0; } @@ -84,6 +103,7 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) static int mana_gd_init_vf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); + u64 shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -111,7 +131,17 @@ static int mana_gd_init_vf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + gc->db_page_off; gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; - gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + shm_off = mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + if (shm_off >= gc->bar0_size || + gc->bar0_size - shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + shm_off; return 0; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a654b3699c4c..9afc786b297a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2520,9 +2520,12 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi_disable_locked(napi); netif_napi_del_locked(napi); } - xdp_rxq_info_unreg(&rxq->xdp_rxq); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + if (rxq->rxobj != INVALID_MANA_HANDLE) + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); @@ -2796,9 +2799,6 @@ out: mana_destroy_rxq(apc, rxq, false); - if (cq) - mana_deinit_cq(apc, cq); - return NULL; } diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 0f1679ebad96..d21b5db06e50 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -61,11 +61,6 @@ union smc_proto_hdr { }; }; /* HW DATA */ -#define SMC_APERTURE_BITS 256 -#define SMC_BASIC_UNIT (sizeof(u32)) -#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) -#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) - static int mana_smc_poll_register(void __iomem *base, bool reset) { void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 03a2669f0518..ee8381b60b8d 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -797,11 +797,11 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) /* Enter config mode before registering the MDIO bus */ ret = rtsn_reset(priv); if (ret) - goto out_free_bus; + goto out_put_node; ret = rtsn_change_mode(priv, OCR_OPC_CONFIG); if (ret) - goto out_free_bus; + goto out_put_node; rtsn_modify(priv, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, MPIC_PSMCS_DEFAULT | MPIC_PSMHT_DEFAULT); @@ -824,6 +824,8 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) return 0; +out_put_node: + of_node_put(mdio_node); out_free_bus: mdiobus_free(mii); return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c index e2240b68ad98..2ab6ecac6422 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c @@ -100,6 +100,8 @@ static int nvt_gmac_probe(struct platform_device *pdev) if (!priv) return dev_err_probe(dev, -ENOMEM, "Failed to allocate private data\n"); + priv->dev = dev; + priv->regmap = syscon_regmap_lookup_by_phandle_args(dev->of_node, "nuvoton,sys", 1, &priv->macid); if (IS_ERR(priv->regmap)) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index d3772d01e00b..2451f6b20b11 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2480,8 +2480,11 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, - rd32(wx, WX_CFG_PORT_ST)); + if (pdev->is_virtfn) + wx->bus.func = PCI_FUNC(pdev->devfn); + else + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index 29cdbed2e5ec..94ff8f5f0b4c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -99,8 +99,8 @@ int wx_request_msix_irqs_vf(struct wx *wx) } } - err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf, - NULL, IRQF_ONESHOT, netdev->name, wx); + err = request_irq(wx->msix_entry->vector, wx_msix_misc_vf, + 0, netdev->name, wx); if (err) { wx_err(wx, "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a05af192caf3..a750768912b5 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -1182,7 +1182,8 @@ void nsim_destroy(struct netdevsim *ns) unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, &ns->nn); - nsim_psp_uninit(ns); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_psp_uninit(ns); rtnl_lock(); peer = rtnl_dereference(ns->peer); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7e129dddbbe7..d909c4160ea1 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -120,7 +120,9 @@ struct netdevsim { u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; - struct psp_dev *dev; + struct psp_dev __rcu *dev; + struct dentry *rereg; + struct mutex rereg_lock; u32 spi; u32 assoc_cnt; } psp; diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c index 0b4d717253b0..6936ecb8173e 100644 --- a/drivers/net/netdevsim/psp.c +++ b/drivers/net/netdevsim/psp.c @@ -19,6 +19,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, struct netdevsim *peer_ns, struct skb_ext **psp_ext) { enum skb_drop_reason rc = 0; + struct psp_dev *peer_psd; struct psp_assoc *pas; struct net *net; void **ptr; @@ -48,7 +49,8 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, } /* Now pretend we just received this frame */ - if (peer_ns->psp.dev->config.versions & (1 << pas->version)) { + peer_psd = rcu_dereference(peer_ns->psp.dev); + if (peer_psd && peer_psd->config.versions & (1 << pas->version)) { bool strip_icv = false; u8 generation; @@ -61,8 +63,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, skb_ext_reset(skb); skb->mac_len = ETH_HLEN; - if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation, - strip_icv)) { + if (psp_dev_rcv(skb, peer_psd->id, generation, strip_icv)) { rc = SKB_DROP_REASON_PSP_OUTPUT; goto out_unlock; } @@ -209,26 +210,50 @@ static struct psp_dev_caps nsim_psp_caps = { .assoc_drv_spc = sizeof(void *), }; -void nsim_psp_uninit(struct netdevsim *ns) +static void __nsim_psp_uninit(struct netdevsim *ns, bool teardown) { - if (!IS_ERR(ns->psp.dev)) - psp_dev_unregister(ns->psp.dev); + struct psp_dev *psd; + + psd = rcu_dereference_protected(ns->psp.dev, + teardown || + lockdep_is_held(&ns->psp.rereg_lock)); + if (psd) { + rcu_assign_pointer(ns->psp.dev, NULL); + synchronize_rcu(); + psp_dev_unregister(psd); + } WARN_ON(ns->psp.assoc_cnt); } +void nsim_psp_uninit(struct netdevsim *ns) +{ + debugfs_remove(ns->psp.rereg); + mutex_destroy(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns, true); +} + static ssize_t nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; - int err; + struct psp_dev *psd; + ssize_t ret; + + mutex_lock(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns, false); - nsim_psp_uninit(ns); + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) { + ret = PTR_ERR(psd); + goto out; + } - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); - return err ?: count; + rcu_assign_pointer(ns->psp.dev, psd); + ret = count; +out: + mutex_unlock(&ns->psp.rereg_lock); + return ret; } static const struct file_operations nsim_psp_rereg_fops = { @@ -241,14 +266,16 @@ static const struct file_operations nsim_psp_rereg_fops = { int nsim_psp_init(struct netdevsim *ns) { struct dentry *ddir = ns->nsim_dev_port->ddir; - int err; + struct psp_dev *psd; + + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) + return PTR_ERR(psd); - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); - if (err) - return err; + rcu_assign_pointer(ns->psp.dev, psd); - debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops); + mutex_init(&ns->psp.rereg_lock); + ns->psp.rereg = debugfs_create_file("psp_rereg", 0200, ddir, ns, + &nsim_psp_rereg_fops); return 0; } diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index db43a1f8a07a..22c555dd962e 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -85,17 +85,24 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) skb_scrub_packet(skb, true); /* network header reset in ovpn_decrypt_post() */ + skb_reset_mac_header(skb); skb_reset_transport_header(skb); skb_reset_inner_headers(skb); /* cause packet to be "received" by the interface */ pkt_len = skb->len; + /* we may get here in process context in case of TCP connections, + * therefore we have to disable BHs to ensure gro_cells_receive() + * and dev_dstats_rx_add() do not get corrupted or enter deadlock + */ + local_bh_disable(); ret = gro_cells_receive(&peer->ovpn->gro_cells, skb); if (likely(ret == NET_RX_SUCCESS)) { /* update RX stats with the size of decrypted packet */ ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len); dev_dstats_rx_add(peer->ovpn->dev, pkt_len); } + local_bh_enable(); } void ovpn_decrypt_post(void *data, int ret) diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5198d66dbbc0..b64beade8dd9 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -563,6 +563,15 @@ void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, } EXPORT_SYMBOL_GPL(bcm_phy_get_stats); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++) + bcm_phy_get_stat(phydev, shadow, i); +} +EXPORT_SYMBOL_GPL(bcm_phy_update_stats_shadow); + void bcm_phy_r_rc_cal_reset(struct phy_device *phydev) { /* Reset R_CAL/RC_CAL Engine */ diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index bceddbc860eb..bba94ce96195 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -85,6 +85,7 @@ int bcm_phy_get_sset_count(struct phy_device *phydev); void bcm_phy_get_strings(struct phy_device *phydev, u8 *data); void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, struct ethtool_stats *stats, u64 *data); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow); void bcm_phy_r_rc_cal_reset(struct phy_device *phydev); int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev); int bcm_phy_enable_jumbo(struct phy_device *phydev); diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 00e8fa14aa77..71a163f62c0e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -807,6 +807,17 @@ static void bcm7xxx_28nm_get_phy_stats(struct phy_device *phydev, bcm_phy_get_stats(phydev, priv->stats, stats, data); } +static int bcm7xxx_28nm_suspend(struct phy_device *phydev) +{ + struct bcm7xxx_phy_priv *priv = phydev->priv; + + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + + return genphy_suspend(phydev); +} + static int bcm7xxx_28nm_probe(struct phy_device *phydev) { struct bcm7xxx_phy_priv *priv; @@ -849,6 +860,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_config_init, \ .resume = bcm7xxx_28nm_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_tunable = bcm7xxx_28nm_get_tunable, \ .set_tunable = bcm7xxx_28nm_set_tunable, \ .get_sset_count = bcm_phy_get_sset_count, \ @@ -866,6 +878,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_ephy_config_init, \ .resume = bcm7xxx_28nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_sset_count = bcm_phy_get_sset_count, \ .get_strings = bcm_phy_get_strings, \ .get_stats = bcm7xxx_28nm_get_phy_stats, \ @@ -902,6 +915,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .config_aneg = genphy_config_aneg, \ .read_status = genphy_read_status, \ .resume = bcm7xxx_16nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ } static struct phy_driver bcm7xxx_driver[] = { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bf0c6a04481e..d1a4edb34ad2 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -592,8 +592,13 @@ static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state) static int bcm54xx_suspend(struct phy_device *phydev) { + struct bcm54xx_phy_priv *priv = phydev->priv; int ret = 0; + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + bcm54xx_ptp_stop(phydev); /* Acknowledge any Wake-on-LAN interrupt prior to suspend */ diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2aa1dedd21b8..e211a523c258 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4548,6 +4548,13 @@ static int lan8814_config_init(struct phy_device *phydev) struct kszphy_priv *lan8814 = phydev->priv; int ret; + if (phy_package_init_once(phydev)) + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + /* Based on the interface type select how the advertise ability is * encoded, to set as SGMII or as USGMII. */ @@ -4655,13 +4662,7 @@ static int lan8814_probe(struct phy_device *phydev) priv->is_ptp_available = err == LAN8814_REV_LAN8814 || err == LAN8814_REV_LAN8818; - if (phy_package_init_once(phydev)) { - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - + if (phy_package_probe_once(phydev)) { err = lan8814_release_coma_mode(phydev); if (err) return err; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index df0bcfedddbc..293ef80c4e30 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -756,6 +756,7 @@ static void ax88772_mac_link_down(struct phylink_config *config, struct usbnet *dev = netdev_priv(to_net_dev(config->dev)); asix_write_medium_mode(dev, 0, 0); + usbnet_link_change(dev, false, false); } static void ax88772_mac_link_up(struct phylink_config *config, @@ -786,6 +787,7 @@ static void ax88772_mac_link_up(struct phylink_config *config, m |= AX_MEDIUM_RFC; asix_write_medium_mode(dev, m, 0); + usbnet_link_change(dev, true, false); } static const struct phylink_mac_ops ax88772_phylink_mac_ops = { diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index bb9929727eb9..0223a172851e 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -2012,6 +2012,14 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&apple_private_interface_info, }, + /* Mac */ + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 0), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 2), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + /* Ericsson MBM devices like F5521gw */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7337bf1b7d6a..1ace1d2398c9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10138,6 +10138,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02c) }, {} }; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e35df717e65e..0cfb19b760dd 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -972,7 +972,8 @@ static int veth_poll(struct napi_struct *napi, int budget) /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; + peer_txq = (peer_dev && queue_idx < peer_dev->real_num_tx_queues) ? + netdev_get_tx_queue(peer_dev, queue_idx) : NULL; xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 3bd57527b1be..15bfb78381d4 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -773,11 +773,6 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) kfree(priv->tx_skbuff); priv->tx_skbuff = NULL; - if (priv->uf_regs) { - iounmap(priv->uf_regs); - priv->uf_regs = NULL; - } - if (priv->uccf) { ucc_fast_free(priv->uccf); priv->uccf = NULL; @@ -1255,12 +1250,12 @@ static void ucc_hdlc_remove(struct platform_device *pdev) uhdlc_memclean(priv); - if (priv->utdm->si_regs) { + if (priv->utdm && priv->utdm->si_regs) { iounmap(priv->utdm->si_regs); priv->utdm->si_regs = NULL; } - if (priv->utdm->siram) { + if (priv->utdm && priv->utdm->siram) { iounmap(priv->utdm->siram); priv->utdm->siram = NULL; } diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 876aed765833..efb9f022d8c6 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig @@ -46,6 +46,7 @@ config ATH10K_SNOC depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_SMEM depends on QCOM_RPROC_COMMON || QCOM_RPROC_COMMON=n + select POWER_SEQUENCING select QCOM_SCM select QCOM_QMI_HELPERS help diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 2519e2400d58..980a12fb2c6e 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1838,10 +1838,22 @@ static struct ath12k_hw_group *ath12k_core_hw_group_alloc(struct ath12k_base *ab return ag; } +static void ath12k_core_free_wsi_info(struct ath12k_hw_group *ag) +{ + int i; + + for (i = 0; i < ag->num_devices; i++) { + of_node_put(ag->wsi_node[i]); + ag->wsi_node[i] = NULL; + } + ag->num_devices = 0; +} + static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag) { mutex_lock(&ath12k_hw_group_mutex); + ath12k_core_free_wsi_info(ag); list_del(&ag->list); kfree(ag); @@ -1867,52 +1879,59 @@ static struct ath12k_hw_group *ath12k_core_hw_group_find_by_dt(struct ath12k_bas static int ath12k_core_get_wsi_info(struct ath12k_hw_group *ag, struct ath12k_base *ab) { - struct device_node *wsi_dev = ab->dev->of_node, *next_wsi_dev; - struct device_node *tx_endpoint, *next_rx_endpoint; - int device_count = 0; - - next_wsi_dev = wsi_dev; + struct device_node *next_wsi_dev; + int device_count = 0, ret = 0; + struct device_node *wsi_dev; - if (!next_wsi_dev) + wsi_dev = of_node_get(ab->dev->of_node); + if (!wsi_dev) return -ENODEV; do { - ag->wsi_node[device_count] = next_wsi_dev; + if (device_count >= ATH12K_MAX_DEVICES) { + ath12k_warn(ab, "device count in DT %d is more than limit %d\n", + device_count, ATH12K_MAX_DEVICES); + ret = -EINVAL; + break; + } + + ag->wsi_node[device_count++] = of_node_get(wsi_dev); - tx_endpoint = of_graph_get_endpoint_by_regs(next_wsi_dev, 0, -1); + struct device_node *tx_endpoint __free(device_node) = + of_graph_get_endpoint_by_regs(wsi_dev, 0, -1); if (!tx_endpoint) { - of_node_put(next_wsi_dev); - return -ENODEV; + ret = -ENODEV; + break; } - next_rx_endpoint = of_graph_get_remote_endpoint(tx_endpoint); + struct device_node *next_rx_endpoint __free(device_node) = + of_graph_get_remote_endpoint(tx_endpoint); if (!next_rx_endpoint) { - of_node_put(next_wsi_dev); - of_node_put(tx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(tx_endpoint); - of_node_put(next_wsi_dev); - next_wsi_dev = of_graph_get_port_parent(next_rx_endpoint); if (!next_wsi_dev) { - of_node_put(next_rx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(next_rx_endpoint); + of_node_put(wsi_dev); + wsi_dev = next_wsi_dev; + } while (ab->dev->of_node != wsi_dev); - device_count++; - if (device_count > ATH12K_MAX_DEVICES) { - ath12k_warn(ab, "device count in DT %d is more than limit %d\n", - device_count, ATH12K_MAX_DEVICES); - of_node_put(next_wsi_dev); - return -EINVAL; + if (ret) { + while (--device_count >= 0) { + of_node_put(ag->wsi_node[device_count]); + ag->wsi_node[device_count] = NULL; } - } while (wsi_dev != next_wsi_dev); - of_node_put(next_wsi_dev); + of_node_put(wsi_dev); + return ret; + } + + of_node_put(wsi_dev); ag->num_devices = device_count; return 0; @@ -1983,9 +2002,9 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a ath12k_core_get_wsi_index(ag, ab)) { ath12k_dbg(ab, ATH12K_DBG_BOOT, "unable to get wsi info from dt, grouping single device"); + ath12k_core_free_wsi_info(ag); ag->id = ATH12K_INVALID_GROUP_ID; ag->num_devices = 1; - memset(ag->wsi_node, 0, sizeof(ag->wsi_node)); wsi->index = 0; } diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 250459facff3..b108ccd0f637 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -565,6 +565,9 @@ static int ath12k_dp_prepare_reo_update_elem(struct ath12k_dp *dp, lockdep_assert_held(&dp->dp_lock); + if (!peer->primary_link) + return 0; + elem = kzalloc_obj(*elem, GFP_ATOMIC); if (!elem) return -ENOMEM; @@ -1337,7 +1340,7 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; - peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rx_info->peer_id); + peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rxcb->peer_id); pubsta = peer ? peer->sta : NULL; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index fbdfe6424fd7..df2334f3bad6 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -788,7 +788,7 @@ struct ath12k_link_vif *ath12k_mac_get_arvif(struct ath12k *ar, u32 vdev_id) /* To use the arvif returned, caller must have held rcu read lock. */ - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif_iter.vdev_id = vdev_id; arvif_iter.ar = ar; diff --git a/drivers/net/wireless/ath/ath12k/p2p.c b/drivers/net/wireless/ath/ath12k/p2p.c index 59589748f1a8..19ebcd1d8eb2 100644 --- a/drivers/net/wireless/ath/ath12k/p2p.c +++ b/drivers/net/wireless/ath/ath12k/p2p.c @@ -123,7 +123,7 @@ static void ath12k_p2p_noa_update_vdev_iter(void *data, u8 *mac, struct ath12k_p2p_noa_arg *arg = data; struct ath12k_link_vif *arvif; - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif = &ahvif->deflink; if (!arvif->is_created || arvif->ar != arg->ar || arvif->vdev_id != arg->vdev_id) return; diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 65a05a9520ff..b5e904a55aea 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -9778,7 +9778,7 @@ static void ath12k_wmi_rssi_dbm_conversion_params_info_event(struct ath12k_base *ab, struct sk_buff *skb) { - struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info; + struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info = {}; struct ath12k *ar; s32 noise_floor; u32 pdev_id; @@ -10251,7 +10251,7 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a { struct wmi_hw_data_filter_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10275,7 +10275,13 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a "wmi hw data filter enable %d filter_bitmap 0x%x\n", arg->enable, arg->hw_filter_bitmap); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) @@ -10283,6 +10289,7 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) struct wmi_wow_host_wakeup_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10295,14 +10302,20 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow host wakeup ind\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_enable(struct ath12k *ar) { struct wmi_wow_enable_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10317,7 +10330,13 @@ int ath12k_wmi_wow_enable(struct ath12k *ar) cmd->pause_iface_config = cpu_to_le32(WOW_IFACE_PAUSE_ENABLED); ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow enable\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, @@ -10327,6 +10346,7 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, struct wmi_wow_add_del_event_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10343,7 +10363,13 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow add wakeup event %s enable %d vdev_id %d\n", wow_wakeup_event(event), enable, vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, @@ -10356,6 +10382,7 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, struct sk_buff *skb; void *ptr; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*tlv) + /* array struct */ @@ -10435,7 +10462,13 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, ath12k_dbg_dump(ar->ab, ATH12K_DBG_WMI, NULL, "wow bitmask: ", bitmap->bitmaskbuf, pattern_len); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) @@ -10443,6 +10476,7 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) struct wmi_wow_del_pattern_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10459,7 +10493,13 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow del pattern vdev_id %d pattern_id %d\n", vdev_id, pattern_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static struct sk_buff * @@ -10595,6 +10635,7 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, struct wmi_pno_scan_req_arg *pno_scan) { struct sk_buff *skb; + int ret; if (pno_scan->enable) skb = ath12k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan); @@ -10604,7 +10645,13 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, if (IS_ERR_OR_NULL(skb)) return -ENOMEM; - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static void ath12k_wmi_fill_ns_offload(struct ath12k *ar, @@ -10717,6 +10764,7 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, void *buf_ptr; size_t len; u8 ns_cnt, ns_ext_tuples = 0; + int ret; ns_cnt = offload->ipv6_count; @@ -10752,7 +10800,13 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, if (ns_ext_tuples) ath12k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, @@ -10762,7 +10816,7 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; __le64 replay_ctr; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10789,7 +10843,13 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "offload gtk rekey vdev: %d %d\n", arvif->vdev_id, enable); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, @@ -10797,7 +10857,7 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, { struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10811,7 +10871,13 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "get gtk rekey vdev_id: %d\n", arvif->vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_sta_keepalive(struct ath12k *ar, @@ -10822,6 +10888,7 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, struct wmi_sta_keepalive_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*arp); skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); @@ -10849,7 +10916,13 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, "wmi sta keepalive vdev %d enabled %d method %d interval %d\n", arg->vdev_id, arg->enabled, arg->method, arg->interval); - return ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 05c9c07591fc..6ca31d4ea437 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1738,7 +1738,8 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb, } info->status.rates[ts->ts_final_idx].count = ts->ts_final_retry; - info->status.rates[ts->ts_final_idx + 1].idx = -1; + if (ts->ts_final_idx + 1 < IEEE80211_TX_MAX_RATES) + info->status.rates[ts->ts_final_idx + 1].idx = -1; if (unlikely(ts->ts_status)) { ah->stats.ack_fail++; diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c index 7651b1bdb592..f0b082596637 100644 --- a/drivers/net/wireless/broadcom/b43/xmit.c +++ b/drivers/net/wireless/broadcom/b43/xmit.c @@ -702,7 +702,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) * key index, but the ucode passed it slightly different. */ keyidx = b43_kidx_to_raw(dev, keyidx); - B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key)); + if (B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key))) + goto drop; if (dev->key[keyidx].algorithm != B43_SEC_ALGO_NONE) { wlhdr_len = ieee80211_hdrlen(fctl); diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c index efd63f4ce74f..ee199d4eaf03 100644 --- a/drivers/net/wireless/broadcom/b43legacy/xmit.c +++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c @@ -476,7 +476,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev, * key index, but the ucode passed it slightly different. */ keyidx = b43legacy_kidx_to_raw(dev, keyidx); - B43legacy_WARN_ON(keyidx >= dev->max_nr_keys); + if (B43legacy_WARN_ON(keyidx >= dev->max_nr_keys)) + goto drop; if (dev->key[keyidx].algorithm != B43legacy_SEC_ALGO_NONE) { /* Remove PROTECTED flag to mark it as decrypted. */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 30f6fcb68632..8fb595733b9c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -2476,8 +2476,9 @@ static void brcmf_sdio_bus_stop(struct device *dev) brcmf_dbg(TRACE, "Enter\n"); if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } @@ -4567,8 +4568,9 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) if (bus) { /* Stop watchdog task */ if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 4fae0e335136..5cc0c5cac257 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -310,6 +310,7 @@ static void if_usb_disconnect(struct usb_interface *intf) struct lbs_private *priv = cardp->priv; cardp->surprise_removed = 1; + wake_up(&cardp->fw_wq); if (priv) { lbs_stop_card(priv); @@ -633,9 +634,10 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff, unsigned long flags; u8 i; - if (recvlength > LBS_CMD_BUFFER_SIZE) { + if (recvlength < MESSAGE_HEADER_LEN || + recvlength > LBS_CMD_BUFFER_SIZE) { lbs_deb_usbd(&cardp->udev->dev, - "The receive buffer is too large\n"); + "The receive buffer is invalid: %d\n", recvlength); kfree_skb(skb); return; } diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 591602beeec6..3cdf9ded876d 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -70,12 +70,11 @@ static inline int rsi_create_kthread(struct rsi_common *common, return 0; } -static inline int rsi_kill_thread(struct rsi_thread *handle) +static inline void rsi_kill_thread(struct rsi_thread *handle) { atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - - return kthread_stop(handle->task); + wait_for_completion(&handle->completion); } void rsi_mac80211_detach(struct rsi_hw *hw); diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index 84eb15d729c7..120f0379f81d 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c @@ -264,14 +264,12 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) wiphy_err(priv->hw->wiphy, "PM request failed: %d. WoW is disabled.\n", ret); cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EBUSY; } /* Force resume if event is coming from the device. */ if (atomic_read(&priv->bh_rx)) { cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EAGAIN; } diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 7968e208dd37..adb29d30c63f 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -457,8 +457,20 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf offset = sizeof(struct feature_query); for (i = 0; i < FEATURE_COUNT && offset < data_length; i++) { + size_t remaining = data_length - offset; + size_t feat_data_len, feat_total; + + if (remaining < sizeof(*rt_feature)) + break; + rt_feature = data + offset; - offset += sizeof(*rt_feature) + le32_to_cpu(rt_feature->data_len); + feat_data_len = le32_to_cpu(rt_feature->data_len); + + if (feat_data_len > remaining - sizeof(*rt_feature)) + break; + + feat_total = sizeof(*rt_feature) + feat_data_len; + offset += feat_total; ft_spt_cfg = FIELD_GET(FEATURE_MSK, core->feature_set[i]); if (ft_spt_cfg != MTK_FEATURE_MUST_BE_SUPPORTED) @@ -468,8 +480,10 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED) return -EINVAL; - if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) - t7xx_port_enum_msg_handler(ctl->md, rt_feature->data); + if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) { + t7xx_port_enum_msg_handler(ctl->md, rt_feature->data, + feat_data_len); + } } return 0; diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c index ae632ef96698..f869e4ed9ee9 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c +++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c @@ -117,6 +117,7 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * t7xx_port_enum_msg_handler() - Parse the port enumeration message to create/remove nodes. * @md: Modem context. * @msg: Message. + * @msg_len: Length of @msg in bytes. * * Used to control create/remove device node. * @@ -124,12 +125,18 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * * 0 - Success. * * -EFAULT - Message check failure. */ -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len) { struct device *dev = &md->t7xx_dev->pdev->dev; unsigned int version, port_count, i; struct port_msg *port_msg = msg; + if (msg_len < sizeof(*port_msg)) { + dev_err(dev, "Port enum msg too short for header: need %zu, have %zu\n", + sizeof(*port_msg), msg_len); + return -EINVAL; + } + version = FIELD_GET(PORT_MSG_VERSION, le32_to_cpu(port_msg->info)); if (version != PORT_ENUM_VER || le32_to_cpu(port_msg->head_pattern) != PORT_ENUM_HEAD_PATTERN || @@ -141,6 +148,13 @@ int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) } port_count = FIELD_GET(PORT_MSG_PRT_CNT, le32_to_cpu(port_msg->info)); + + if (msg_len < struct_size(port_msg, data, port_count)) { + dev_err(dev, "Port enum msg too short: need %zu, have %zu\n", + struct_size(port_msg, data, port_count), msg_len); + return -EINVAL; + } + for (i = 0; i < port_count; i++) { u32 port_info = le32_to_cpu(port_msg->data[i]); unsigned int ch_id; @@ -191,7 +205,7 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb) case CTL_ID_PORT_ENUM: skb_pull(skb, sizeof(*ctrl_msg_h)); - ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data); + ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data, skb->len); if (!ret) ret = port_ctl_send_msg_to_md(port, CTL_ID_PORT_ENUM, 0); else diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h index f0918b36e899..7c3190bf0fcf 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h +++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h @@ -103,7 +103,7 @@ void t7xx_port_proxy_reset(struct port_proxy *port_prox); void t7xx_port_proxy_uninit(struct port_proxy *port_prox); int t7xx_port_proxy_init(struct t7xx_modem *md); void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int state); -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg); +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len); int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id, bool en_flag); void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id); diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 77c778d84d4c..a81b46af5118 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -146,6 +146,9 @@ struct xt_match { /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); + /* Called to validate hooks based on the match configuration. */ + int (*check_hooks)(const struct xt_mtchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -187,6 +190,9 @@ struct xt_target { /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); + /* Called to validate hooks based on the target configuration. */ + int (*check_hooks)(const struct xt_tgchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -279,8 +285,10 @@ bool xt_find_jump_offset(const unsigned int *offsets, int xt_check_proc_name(const char *name, unsigned int size); +int xt_check_hooks_match(struct xt_mtchk_param *par); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); +int xt_check_hooks_target(struct xt_tgchk_param *par); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index dc3975ff1b2e..cf0fd03dd7a2 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -21,6 +21,11 @@ enum hk_type { HK_TYPE_MAX, /* + * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN + */ + HK_TYPE_KTHREAD = HK_TYPE_DOMAIN, + + /* * The following housekeeping types are only set by the nohz_full * boot commandline option. So they can share the same value. */ @@ -29,7 +34,6 @@ enum hk_type { HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, - HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7bffb908c1e..aa600fbf9a53 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2495,7 +2495,7 @@ void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); int hci_abort_conn(struct hci_conn *conn, u8 reason); -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e0ca3904ff8e..2f312d1f67d6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -99,6 +99,7 @@ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ FN(IPV6_BAD_EXTHDR) \ + FN(IPV6_TOO_MANY_EXTHDRS) \ FN(IPV6_NDISC_FRAG) \ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ @@ -494,6 +495,11 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_MINTTL, /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ SKB_DROP_REASON_IPV6_BAD_EXTHDR, + /** + * @SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS: Number of IPv6 extension + * headers in the packet exceeds IP6_MAX_EXT_HDRS_CNT. + */ + SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS, /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */ SKB_DROP_REASON_IPV6_NDISC_FRAG, /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 72d325c81313..02762ce73a0c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -491,6 +491,7 @@ struct ip_vs_est_kt_data { DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ unsigned long est_timer; /* estimation timer (jiffies) */ struct ip_vs_stats *calc_stats; /* Used for calculation */ + int needed; /* task is needed */ int tick_len[IPVS_EST_NTICKS]; /* est count */ int id; /* ktid per netns */ int chain_max; /* max ests per tick chain */ @@ -1411,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { if (ipvs->est_cpulist_valid) return ipvs->sysctl_est_cpulist; @@ -1529,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { return housekeeping_cpumask(HK_TYPE_KTHREAD); } @@ -1564,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) return READ_ONCE(ipvs->sysctl_svc_lfactor); } +static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_empty(__sysctl_est_cpulist(ipvs)); +} + +static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_weight(__sysctl_est_cpulist(ipvs)); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1884,18 +1897,26 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); -void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart); int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd); void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); +static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + ipvs->tot_stats->s.est.ktid = -2; +#endif +} + static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) { #ifdef CONFIG_SYSCTL /* Stop tasks while cpulist is empty or if disabled with flag */ ipvs->est_stopped = !sysctl_run_estimation(ipvs) || (ipvs->est_cpulist_valid && - cpumask_empty(sysctl_est_cpulist(ipvs))); + sysctl_est_cpulist_empty(ipvs)); #endif } @@ -1911,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) { unsigned int limit = IPVS_EST_CPU_KTHREADS * - cpumask_weight(sysctl_est_cpulist(ipvs)); + sysctl_est_cpulist_weight(ipvs); return max(1U, limit); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d042afe7a245..1dec81faff28 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -90,6 +90,9 @@ struct ip_tunnel_info; #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ +/* Hard limit on traversed IPv6 extension headers */ +#define IP6_MAX_EXT_HDRS_CNT 12 + /* * Addr type * diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h index 5199b41497ff..dbabcfb95daf 100644 --- a/include/net/mana/shm_channel.h +++ b/include/net/mana/shm_channel.h @@ -4,6 +4,12 @@ #ifndef _SHM_CHANNEL_H #define _SHM_CHANNEL_H +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) +#define SMC_APERTURE_SIZE (SMC_APERTURE_BITS / 8) + struct shm_channel { struct device *dev; void __iomem *base; diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index b175d271aec9..609bcf422a9b 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -3,10 +3,23 @@ #define _NF_DUP_NETDEV_H_ #include <net/netfilter/nf_tables.h> +#include <linux/netdevice.h> +#include <linux/sched.h> void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); +#define NF_RECURSION_LIMIT 2 + +static inline u8 *nf_get_nf_dup_skb_recursion(void) +{ +#ifndef CONFIG_PREEMPT_RT + return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); +#else + return ¤t->net_xmit.nf_dup_skb_recursion; +#endif +} + struct nft_offload_ctx; struct nft_flow_rule; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b09c11c048d5..7b23b245a5a8 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -148,9 +148,10 @@ struct flow_offload_tuple { /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; - u8 dir:2, + u16 dir:2, xmit_type:3, encap_num:2, + needs_gso_segment:1, tun_num:2, in_vlan_ingress:2; u16 mtu; @@ -232,6 +233,7 @@ struct nf_flow_route { u32 hw_ifindex; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + u8 needs_gso_segment:1; } out; enum flow_offload_xmit_type xmit_type; } tuple[FLOW_OFFLOAD_DIR_MAX]; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80ccd4dda8e0..6e27c56514df 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -275,7 +275,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES - struct mr_table *mrt; + struct mr_table __rcu *mrt; #else struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d4515c..853c8d7644b5 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -330,11 +330,18 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: - case BNEP_FILTER_NET_TYPE_SET: - /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ - if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) + case BNEP_FILTER_NET_TYPE_SET: { + u8 *hdr; + + /* Pull ctrl type (1 b) + len (2 b) */ + hdr = skb_pull_data(skb, 3); + if (!hdr) + goto badframe; + /* Pull data (len bytes); length is big-endian */ + if (!skb_pull(skb, get_unaligned_be16(&hdr[1]))) goto badframe; break; + } default: kfree_skb(skb); return 0; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3a0592599086..17b46ad6a349 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -480,40 +480,107 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return hci_setup_sync_conn(conn, handle); } -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, - u16 to_multiplier) +struct le_conn_update_data { + struct hci_conn *conn; + u16 min; + u16 max; + u16 latency; + u16 to_multiplier; +}; + +static int le_conn_update_sync(struct hci_dev *hdev, void *data) { - struct hci_dev *hdev = conn->hdev; + struct le_conn_update_data *d = data; + struct hci_conn *conn = d->conn; struct hci_conn_params *params; struct hci_cp_le_conn_update cp; + u16 timeout; + u8 store_hint; + int err; + /* Verify connection is still alive and read conn fields under + * the same lock to prevent a concurrent disconnect from freeing + * or reusing the connection while we build the HCI command. + */ hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - params->conn_min_interval = min; - params->conn_max_interval = max; - params->conn_latency = latency; - params->supervision_timeout = to_multiplier; + if (!hci_conn_valid(hdev, conn)) { + hci_dev_unlock(hdev); + return -ECANCELED; } - hci_dev_unlock(hdev); - memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); - cp.conn_interval_min = cpu_to_le16(min); - cp.conn_interval_max = cpu_to_le16(max); - cp.conn_latency = cpu_to_le16(latency); - cp.supervision_timeout = cpu_to_le16(to_multiplier); + cp.conn_interval_min = cpu_to_le16(d->min); + cp.conn_interval_max = cpu_to_le16(d->max); + cp.conn_latency = cpu_to_le16(d->latency); + cp.supervision_timeout = cpu_to_le16(d->to_multiplier); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); + timeout = conn->conn_timeout; + + hci_dev_unlock(hdev); - hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, + HCI_EV_LE_CONN_UPDATE_COMPLETE, + timeout, NULL); + if (err) + return err; + + /* Update stored connection parameters after the controller has + * confirmed the update via the LE Connection Update Complete event. + */ + hci_dev_lock(hdev); + + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) { + params->conn_min_interval = d->min; + params->conn_max_interval = d->max; + params->conn_latency = d->latency; + params->supervision_timeout = d->to_multiplier; + store_hint = 0x01; + } else { + store_hint = 0x00; + } - if (params) - return 0x01; + hci_dev_unlock(hdev); - return 0x00; + mgmt_new_conn_param(hdev, &conn->dst, conn->dst_type, store_hint, + d->min, d->max, d->latency, d->to_multiplier); + + return 0; +} + +static void le_conn_update_complete(struct hci_dev *hdev, void *data, int err) +{ + struct le_conn_update_data *d = data; + + hci_conn_put(d->conn); + kfree(d); +} + +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + struct le_conn_update_data *d; + + d = kzalloc_obj(*d); + if (!d) + return; + + hci_conn_get(conn); + d->conn = conn; + d->min = min; + d->max = max; + d->latency = latency; + d->to_multiplier = to_multiplier; + + if (hci_cmd_sync_queue(conn->hdev, le_conn_update_sync, d, + le_conn_update_complete) < 0) { + hci_conn_put(conn); + kfree(d); + } } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, @@ -2130,6 +2197,9 @@ static int create_big_sync(struct hci_dev *hdev, void *data) u32 flags = 0; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + if (qos->bcast.out.phys == BIT(1)) flags |= MGMT_ADV_FLAG_SEC_2M; @@ -2204,11 +2274,24 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "conn %p", conn); + if (err == -ECANCELED) + goto done; + + hci_dev_lock(hdev); + + if (!hci_conn_valid(hdev, conn)) + goto unlock; + if (err) { bt_dev_err(hdev, "Unable to create BIG: %d", err); hci_connect_cfm(conn, err); hci_conn_del(conn); } + +unlock: + hci_dev_unlock(hdev); +done: + hci_conn_put(conn); } struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, @@ -2336,10 +2419,11 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ - err = hci_cmd_sync_queue(hdev, create_big_sync, conn, + err = hci_cmd_sync_queue(hdev, create_big_sync, hci_conn_get(conn), create_big_complete); if (err < 0) { hci_conn_drop(conn); + hci_conn_put(conn); return ERR_PTR(err); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b2ee6b6a0f56..eea2f810aafa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7118,9 +7118,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, continue; } + if (ev->num_bis <= i) { + bt_dev_err(hdev, + "Not enough BIS handles for BIG 0x%2.2x", + ev->handle); + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + continue; + } + if (hci_conn_set_handle(conn, - __le16_to_cpu(ev->bis_handle[i++]))) + __le16_to_cpu(ev->bis_handle[i++]))) { + bt_dev_err(hdev, + "Failed to set BIS handle for BIG 0x%2.2x", + ev->handle); + /* Force error so BIG gets terminated as not all BIS + * could be connected. + */ + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); continue; + } conn->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_CREATED, &conn->flags); @@ -7129,7 +7149,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, hci_iso_setup_path(conn); } - if (!ev->status && !i) + /* If there is an unexpected error or if no BISes have been connected + * for the BIG, terminate it. + */ + if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i)) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation @@ -7168,7 +7191,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); conn->num_bis = 0; - memset(conn->bis, 0, sizeof(conn->num_bis)); + memset(conn->bis, 0, sizeof(conn->bis)); for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7bcf8c5ceaee..976f91eeb745 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1036,6 +1036,28 @@ static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr) } /* + * Consume session->conn: clear the member under hidp_session_sem, then + * l2cap_unregister_user() and l2cap_conn_put() the snapshot outside the + * sem. At most one caller wins; later callers see NULL and skip. The + * reference is the one hidp_session_new() took via l2cap_conn_get(). + */ +static void hidp_session_unregister_conn(struct hidp_session *session) +{ + struct l2cap_conn *conn; + + down_write(&hidp_session_sem); + conn = session->conn; + if (conn) + session->conn = NULL; + up_write(&hidp_session_sem); + + if (conn) { + l2cap_unregister_user(conn, &session->user); + l2cap_conn_put(conn); + } +} + +/* * Start session synchronously * This starts a session thread and waits until initialization * is done or returns an error if it couldn't be started. @@ -1311,8 +1333,7 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - if (session->conn) - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); @@ -1418,7 +1439,7 @@ int hidp_connection_del(struct hidp_conndel_req *req) HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0); else - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index be145e2736b7..7cb2864fe872 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -347,6 +347,7 @@ static int iso_connect_bis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!bis_capable(hdev)) { err = -EOPNOTSUPP; @@ -399,13 +400,9 @@ static int iso_connect_bis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -421,9 +418,8 @@ static int iso_connect_bis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -444,6 +440,7 @@ static int iso_connect_cis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!cis_central_capable(hdev)) { err = -EOPNOTSUPP; @@ -498,13 +495,9 @@ static int iso_connect_cis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -520,9 +513,8 @@ static int iso_connect_cis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1193,7 +1185,7 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, release_sock(sk); - if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + if (bacmp(&sa->iso_bdaddr, BDADDR_ANY)) err = iso_connect_cis(sk); else err = iso_connect_bis(sk); @@ -2256,8 +2248,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); if (sk && !ev1->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); iso_pi(sk)->bc_sid = ev1->sid; + release_sock(sk); } goto done; @@ -2268,8 +2262,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid_past, ev1a); if (sk && !ev1a->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1a->sync_handle); iso_pi(sk)->bc_sid = ev1a->sid; + release_sock(sk); } goto done; @@ -2296,27 +2292,35 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2); if (sk) { - int err; - struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + int err = 0; + bool big_sync; + struct hci_conn *hcon; + lock_sock(sk); + + hcon = iso_pi(sk)->conn->hcon; iso_pi(sk)->qos.bcast.encryption = ev2->encryption; if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; - if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && - !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { + big_sync = !test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && + !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags); + + if (big_sync) err = hci_conn_big_create_sync(hdev, hcon, &iso_pi(sk)->qos, iso_pi(sk)->sync_handle, iso_pi(sk)->bc_num_bis, iso_pi(sk)->bc_bis); - if (err) { - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); - sock_put(sk); - sk = NULL; - } + + release_sock(sk); + + if (big_sync && err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); + sock_put(sk); + sk = NULL; } } @@ -2370,8 +2374,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!base || base_len > BASE_MAX_LENGTH) goto done; + lock_sock(sk); memcpy(iso_pi(sk)->base, base, base_len); iso_pi(sk)->base_len = base_len; + release_sock(sk); } else { /* This is a PA data fragment. Keep pa_data_len set to 0 * until all data has been reassembled. diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 77dec104a9c3..7701528f1167 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4706,16 +4706,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); - if (!err) { - u8 store_hint; - - store_hint = hci_le_conn_update(hcon, min, max, latency, - to_multiplier); - mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, - store_hint, min, max, latency, - to_multiplier); - - } + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); return 0; } @@ -5428,7 +5420,7 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, * configured, the MPS field may be less than the current MPS * of that channel. */ - if (chan[i]->remote_mps >= mps && i) { + if (chan[i]->remote_mps > mps && num_scid > 1) { BT_ERR("chan %p decreased MPS %u -> %u", chan[i], chan[i]->remote_mps, mps); result = L2CAP_RECONF_INVALID_MPS; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 71e8c1b45bce..cf590a67d364 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1498,6 +1498,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; + if (!parent) + return NULL; + lock_sock(parent); /* Check for backlog size */ @@ -1657,6 +1660,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, { struct sock *sk = chan->data; + if (!sk) + return; + sk->sk_state = state; if (err) @@ -1758,6 +1764,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return 0; + return READ_ONCE(sk->sk_sndtimeo); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 611a9a94151e..d11bd5337d57 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1715,9 +1715,12 @@ static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk } if (pf && d->cfc) { - u8 credits = *(u8 *) skb->data; skb_pull(skb, 1); + u8 *credits = skb_pull_data(skb, 1); - d->tx_credits += credits; + if (!credits) + goto drop; + + d->tx_credits += *credits; if (d->tx_credits) clear_bit(RFCOMM_TX_THROTTLED, &d->flags); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 18826d4b9c0b..eba44525d41d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -472,9 +472,13 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) sk1 = sk; } + sk = sk ? sk : sk1; + if (sk) + sock_hold(sk); + read_unlock(&sco_sk_list.lock); - return sk ? sk : sk1; + return sk; } static void sco_sock_destruct(struct sock *sk) @@ -515,11 +519,13 @@ static void sco_sock_kill(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); /* Sock is dead, so set conn->sk to NULL to avoid possible UAF */ + lock_sock(sk); if (sco_pi(sk)->conn) { sco_conn_lock(sco_pi(sk)->conn); sco_pi(sk)->conn->sk = NULL; sco_conn_unlock(sco_pi(sk)->conn); } + release_sock(sk); /* Kill poor orphan */ bt_sock_unlink(&sco_sk_list, sk); @@ -1365,40 +1371,51 @@ static int sco_sock_release(struct socket *sock) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent; - struct sock *sk = conn->sk; + struct sock *parent, *sk; + + sco_conn_lock(conn); + sk = sco_sock_hold(conn); + sco_conn_unlock(conn); BT_DBG("conn %p", conn); if (sk) { lock_sock(sk); - sco_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); + + /* conn->sk may have become NULL if racing with sk close, but + * due to held hdev->lock, it can't become different sk. + */ + if (conn->sk) { + sco_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } + release_sock(sk); + sock_put(sk); } else { - sco_conn_lock(conn); - - if (!conn->hcon) { - sco_conn_unlock(conn); + if (!conn->hcon) return; - } + + lockdep_assert_held(&conn->hcon->hdev->lock); parent = sco_get_sock_listen(&conn->hcon->src); - if (!parent) { - sco_conn_unlock(conn); + if (!parent) return; - } lock_sock(parent); + sco_conn_lock(conn); + + /* hdev->lock guarantees conn->sk == NULL still here */ + + if (parent->sk_state != BT_LISTEN) + goto release; + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); - if (!sk) { - release_sock(parent); - sco_conn_unlock(conn); - return; - } + if (!sk) + goto release; sco_sock_init(sk, parent); @@ -1417,9 +1434,10 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - release_sock(parent); - +release: sco_conn_unlock(conn); + release_sock(parent); + sock_put(parent); } } diff --git a/net/core/dev.c b/net/core/dev.c index 06c195906231..8bfa8313ef62 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,7 +371,7 @@ static void netdev_name_node_alt_free(struct rcu_head *head) static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) { netdev_name_node_del(name_node); - list_del(&name_node->list); + list_del_rcu(&name_node->list); call_rcu(&name_node->rcu, netdev_name_node_alt_free); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4381e0fc25bf..84faace50ac2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf - * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + * is used to format np->dev_mac when np->dev_name is empty; bufsz must + * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address + * and its NUL terminator. */ -static char *egress_dev(struct netpoll *np, char *buf) +static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz) { if (np->dev_name[0]) return np->dev_name; - snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + snprintf(buf, bufsz, "%pM", np->dev_mac); return buf; } @@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EINVAL; } @@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return err; } @@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } @@ -736,7 +738,8 @@ int netpoll_setup(struct netpoll *np) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); + np_err(np, "%s doesn't exist, aborting\n", + egress_dev(np, buf, sizeof(buf))); err = -ENODEV; goto unlock; } @@ -744,14 +747,14 @@ int netpoll_setup(struct netpoll *np) if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = dev_open(ndev, NULL); if (err) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b613bb6e07df..df042da422ef 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memset(&vf_broadcast, 0, sizeof(vf_broadcast)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5fb812443a08..4366cbac3f06 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err) struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); + int seqhi_len = 0; + __be32 *seqhi; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bcdef65..6a5febbdbee4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a674fb44ec25..a9ad39064f3b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -122,16 +122,29 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +static bool IGMP_V1_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1) + return true; + seen = READ_ONCE(in_dev->mr_v1_seen); + return seen && time_before(jiffies, seen); +} + +static bool IGMP_V2_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2) + return true; + seen = READ_ONCE(in_dev->mr_v2_seen); + return seen && time_before(jiffies, seen); +} static int unsolicited_report_interval(struct in_device *in_dev) { @@ -954,23 +967,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int max_delay; int mark = 0; struct net *net = dev_net(in_dev->dev); - + unsigned long seen; if (len == 8) { + seen = jiffies + READ_ONCE(in_dev->mr_qrv) * READ_ONCE(in_dev->mr_qi) + + READ_ONCE(in_dev->mr_qri); if (ih->code == 0) { /* Alas, old v1 router presents here. */ max_delay = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_v1_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v1_seen, seen); group = 0; } else { /* v2 router present */ max_delay = ih->code*(HZ/IGMP_TIMER_SCALE); - in_dev->mr_v2_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v2_seen, seen); } /* cancel the interface change timer */ WRITE_ONCE(in_dev->mr_ifc_count, 0); @@ -995,6 +1006,8 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (!max_delay) max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ + unsigned long mr_qi; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return true; @@ -1015,15 +1028,16 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); - in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; - + WRITE_ONCE(in_dev->mr_qrv, + ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + WRITE_ONCE(in_dev->mr_qi, mr_qi); /* RFC3376, 8.3. Query Response Interval: * The number of seconds represented by the [Query Response * Interval] must be less than the [Query Interval]. */ - if (in_dev->mr_qri >= in_dev->mr_qi) - in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ; + if (READ_ONCE(in_dev->mr_qri) >= mr_qi) + WRITE_ONCE(in_dev->mr_qri, (mr_qi/HZ - 1) * HZ); if (!group) { /* general query */ if (ih3->nsrcs) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d8083b9033c2..5b957a831e7c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -179,7 +179,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - if (p) + /* Make sure tree was not modified during our lookup. */ + if (p && !read_seqretry(&base->lock, seq)) return p; /* retry an exact lookup, taking the lock before. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e4790cc7b5c2..5bcd73cbdb41 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,6 +1233,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2058ca860294..2628cd3a93a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -537,15 +537,16 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) }; int err; + rcu_read_lock(); err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { + rcu_read_unlock(); kfree_skb(skb); return err; } DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); - rcu_read_lock(); /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), @@ -1112,11 +1113,12 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg->im_vif_hi = vifi >> 8; ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); - /* Add our header */ - igmp = skb_put(skb, sizeof(struct igmphdr)); + /* Add our header. + * Note that code, csum and group fields are cleared. + */ + igmp = skb_put_zero(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; - igmp->code = 0; ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ skb->transport_header = skb->network_header; } diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 5080fa5fbf6a..f9c6755f5ec5 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -94,6 +94,9 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, #endif int doff = 0; + if (ntohs(iph->frag_off) & IP_OFFSET) + return NULL; + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8fc24c3743c5..c0526cc03980 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1827,7 +1827,6 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; - struct sock *rsk; reason = psp_sk_rx_policy_check(sk, skb); if (reason) @@ -1863,24 +1862,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; if (nsk != sk) { reason = tcp_child_process(sk, nsk, skb); - if (reason) { - rsk = nsk; + sock_put(nsk); + if (reason) goto reset; - } return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); - if (reason) { - rsk = sk; + if (reason) goto reset; - } return 0; reset: - tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); + tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: sk_skb_reason_drop(sk, skb, reason); /* Be careful here. If this function gets more complicated and @@ -2193,8 +2189,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v4_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 199f0b579e89..e6092c3ac840 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -1012,6 +1012,6 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, } bh_unlock_sock(child); - sock_put(child); + return reason; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index c024aa77f25b..c3806c6ac96f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -164,7 +164,7 @@ config IPV6_SIT select INET_TUNNEL select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE - default y + default m help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -172,7 +172,7 @@ config IPV6_SIT into IPv4 packets. This is useful if you want to connect two IPv6 networks over an IPv4-only path. - Saying M here will produce a module called sit. If unsure, say Y. + Saying M here will produce a module called sit. If unsure, say M. config IPV6_SIT_6RD bool "IPv6: IPv6 Rapid Deployment (6RD)" diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index cb26beea4398..de1e68199a01 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err) struct ipv6hdr *top_iph = ipv6_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); int ah_hlen = ipv6_authlen(ah); + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(seqhi, seqhi_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313734f8..9c06c5a1419d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 49e31e4ae7b7..9d06d487e8b1 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -73,6 +73,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; + int exthdr_cnt = 0; *frag_offp = 0; @@ -82,6 +83,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -1; @@ -190,6 +193,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int exthdr_cnt = 0; bool found; if (fragoff) @@ -216,6 +220,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -EBADMSG; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -EBADMSG; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 63fc8556b475..365b4059eb20 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2262,10 +2262,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct ip6_tnl *t; + struct ip6gre_net *ign; + ign = net_generic(t->net, ip6gre_net_id); t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 967b07aeb683..8972863c93ee 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -403,6 +403,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final) { + int exthdr_cnt = IP6CB(skb)->flags & IP6SKB_HOPBYHOP ? 1 : 0; const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; @@ -487,6 +488,10 @@ resubmit_final: nexthdr = ret; goto resubmit_final; } else { + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) { + SKB_DR_SET(reason, IPV6_TOO_MANY_EXTHDRS); + goto discard; + } goto resubmit; } } else if (ret == 0) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e92909ab5be..c14adcdd4396 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -468,6 +468,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) default: break; } + hdr = ipv6_hdr(skb); } /* @@ -582,6 +583,8 @@ int ip6_forward(struct sk_buff *skb) if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); + + hdr = ipv6_hdr(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the @@ -1794,6 +1797,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c468c83af0f2..9d1037ac082f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -399,11 +399,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 nexthdr = ipv6h->nexthdr; + int exthdr_cnt = 0; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + break; + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index ced8bd44828e..893f2aeb4711 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -100,6 +100,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; + unsigned short fragoff = 0; int doff = 0; int thoff = 0, tproto; #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -107,8 +108,8 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, struct nf_conn const *ct; #endif - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 19eb6b702227..e3d355d1fbd6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1645,6 +1645,10 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); + if (!idev) { + rcu_read_unlock(); + return 0; + } mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -4995,6 +4999,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); break; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c3f7a739709..d13d49bfef19 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -288,8 +288,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, saddr = &fl6->saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); - if (err) + if (err) { + dst_release(dst); goto failure; + } } /* set the source address */ @@ -1617,12 +1619,13 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); + if (!nsk) + return 0; if (nsk != sk) { - if (nsk) { - reason = tcp_child_process(sk, nsk, skb); - if (reason) - goto reset; - } + reason = tcp_child_process(sk, nsk, skb); + sock_put(nsk); + if (reason) + goto reset; return 0; } } else @@ -1827,8 +1830,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805d3b01..9b586fcec485 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); - if (dst->error) + if (dst->error) { + dst_release(dst); goto drop; + } skb_dst_set(skb, dst); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 160ae65a5c64..0a0f27836d57 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -438,6 +438,15 @@ ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); /* + * Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in + * their basic HT-MCS set. On cards with lower spatial streams, the check + * would fail, and we'd be stuck with no HT when it in fact work fine with + * its own supported rate. So check it only in strict mode. + */ + if (!ieee80211_hw_check(&sdata->local->hw, STRICT)) + return true; + + /* * P802.11REVme/D7.0 - 6.5.4.2.4 * ... * If the MLME of an HT STA receives an MLME-JOIN.request primitive @@ -9140,7 +9149,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_link_data *link; - bool have_sta = false; + struct sta_info *have_sta = NULL; bool mlo; int err; u16 new_links; @@ -9159,11 +9168,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = false; } - if (assoc) { - rcu_read_lock(); + if (assoc) have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } if (mlo && !have_sta && WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) @@ -9327,6 +9333,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_release_chan: ieee80211_link_release_channel(link); out_err: + if (mlo && have_sta) + WARN_ON(__sta_info_destroy(have_sta)); ieee80211_vif_set_links(sdata, 0, 0); return err; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3e5d1c47a5b0..d18e962126ce 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4971,7 +4971,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - static ieee80211_rx_result res; + ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -5380,7 +5380,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - ieee80211_rx_data_set_link(&rx, link_sta->link_id); + if (!ieee80211_rx_data_set_link(&rx, + link_sta->link_id)) + goto out; } if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index adc069065e73..fa370831d617 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c @@ -65,6 +65,7 @@ static const struct determine_chan_mode_case { .ht_capa_mask = { .mcs.rx_mask[0] = 0xf7, }, + .strict = true, }, { .desc = "Masking out a RX rate in VHT capabilities", .conn_mode = IEEE80211_CONN_MODE_EHT, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b093bc203c81..2529b01e2cd5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3700,11 +3700,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); struct cfg80211_chan_def chandef; - struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *ctx, *tmp; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { + list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index e1033643fab0..e4b230ef6099 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -920,9 +920,9 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) static void mctp_test_route_input_null_eid(struct kunit *test) { struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sockaddr_mctp addr = { 0 }; struct sk_buff *skb_pkt, *skb_sk; struct mctp_test_dev *dev; - struct sockaddr_mctp addr; struct socket *sock; u8 type = 0; int rc; diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index c3987d5ade7a..6eef8d485c25 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -116,7 +116,7 @@ void mctp_test_destroy_dev(struct mctp_test_dev *dev) static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { skb->dev = dst->dev->dev; - dev_queue_xmit(skb); + dev_direct_xmit(skb, 0); return 0; } diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 82ec15bcfd7f..082c46c0f50e 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + bool has_rxtstamp; /* on early fallback the subflow context is deleted by * subflow_syn_recv_sock() @@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf */ tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 57a456690406..3c152bf66cd5 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; + bool timer_done; struct timer_list add_timer; struct mptcp_sock *sock; struct rcu_head rcu; @@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; + if (!__mptcp_subflow_active(subflow)) + continue; + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) const struct net *net = sock_net((struct sock *)msk); unsigned int rto = mptcp_get_add_addr_timeout(net); struct mptcp_subflow_context *subflow; - unsigned int max = 0; + unsigned int max = 0, max_stale = 0; + + if (!rto) + return 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); - if (icsk->icsk_rto > max) + if (!__mptcp_subflow_active(subflow)) + continue; + + if (unlikely(subflow->stale)) { + if (icsk->icsk_rto > max_stale) + max_stale = icsk->icsk_rto; + } else if (icsk->icsk_rto > max) { max = icsk->icsk_rto; + } } - if (max && max < rto) - rto = max; + if (max) + return min(max, rto); + + if (max_stale) + return min(max_stale, rto); return rto; } @@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - unsigned int timeout; + unsigned int timeout = 0; pr_debug("msk=%p\n", msk); - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - if (!entry->addr.id) - return; + bh_lock_sock(sk); + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + goto out; - if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + timeout = HZ / 20; goto out; } timeout = mptcp_adjust_add_addr_timeout(msk); - if (!timeout) + if (!timeout || mptcp_pm_should_add_signal_addr(msk)) goto out; spin_lock_bh(&msk->pm.lock); @@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + (timeout << entry->retrans_times)); + timeout <<= entry->retrans_times; + else + timeout = 0; spin_unlock_bh(&msk->pm.lock); @@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: - __sock_put(sk); + if (timeout) + sk_reset_timer(sk, timer, jiffies + timeout); + else + /* if sock_put calls sk_free: avoid waiting for this timer */ + entry->timer_done = true; + bh_unlock_sock(sk); + sock_put(sk); } struct mptcp_pm_add_entry * @@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: + add_entry->timer_done = false; timeout = mptcp_adjust_add_addr_timeout(msk); if (timeout) sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); @@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); + if (!entry->timer_done) + sk_stop_timer_sync(sk, &entry->add_timer); kfree_rcu(entry, rcu); } } diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index c9f1e5af3cd3..fc818b63752e 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < endp_signal_max) { + u8 endp_id; + /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (!select_signal_address(pernet, msk, &local)) goto subflow; + /* Special case for ID0: set the correct ID */ + endp_id = local.addr.id; + if (endp_id == msk->mpc_endpoint_id) + local.addr.id = 0; + /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + __clear_bit(endp_id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_addr_send_ack(msk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0efe40be2fde..1cf608e7357b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, if (ret) break; } + + if (!ret) + sockopt_seq_inc(msk); + return ret; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e2cb9d23e4a0..d562e149606f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -581,7 +581,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup); if (!subflow_thmac_valid(subflow)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } @@ -908,7 +908,7 @@ create_child: if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2082bfb2d93c..9ea6b4fa78bf 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -267,27 +267,20 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash_key2 = hash_key; use2 = false; } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, &head, &head2); - spin_lock(&cp->lock); - - if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - cp->flags |= IP_VS_CONN_F_HASHED; - WRITE_ONCE(cp->hn0.hash_key, hash_key); - WRITE_ONCE(cp->hn1.hash_key, hash_key2); - refcount_inc(&cp->refcnt); - hlist_bl_add_head_rcu(&cp->hn0.node, head); - if (use2) - hlist_bl_add_head_rcu(&cp->hn1.node, head2); - ret = 1; - } else { - pr_err("%s(): request for already hashed, called from %pS\n", - __func__, __builtin_return_address(0)); - ret = 0; - } - spin_unlock(&cp->lock); + cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); + refcount_inc(&cp->refcnt); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); + conn_tab_unlock(head, head2); + ret = 1; /* Schedule resizing if load increases */ if (atomic_read(&ipvs->conn_count) > t->u_thresh && @@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, &head, &head2); - spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ @@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) } } - spin_unlock(&cp->lock); conn_tab_unlock(head, head2); rcu_read_unlock(); @@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) struct ip_vs_conn_hnode *hn; u32 hash_key, hash_key_new; struct ip_vs_conn_param p; + bool by_me = false; int ntbl; int dir; @@ -664,8 +656,16 @@ retry: t = rcu_dereference(t->new_tbl); ntbl++; /* We are lost? */ - if (ntbl >= 2) + if (ntbl >= 2) { + spin_lock_bh(&cp->lock); + if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me) + cp->cport = 0; + /* hn1 will be rehashed on next packet */ + spin_unlock_bh(&cp->lock); + IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n", + __func__, dir); return; + } } /* Rehashing during resize? Use the recent table for adds */ @@ -683,10 +683,13 @@ retry: if (head > head2 && t == t2) swap(head, head2); + /* Protect the cp->flags modification */ + spin_lock_bh(&cp->lock); + /* Lock seqcount only for the old bucket, even if we are on new table * because it affects the del operation, not the adding. */ - spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_lock(&t->lock[hash_key & t->lock_mask].l); preempt_disable_nested(); write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); @@ -704,14 +707,23 @@ retry: hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + spin_unlock_bh(&cp->lock); hash_key = hash_key_new; goto retry; } - spin_lock(&cp->lock); - if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && - (cp->flags & IP_VS_CONN_F_HASHED)) { + /* Fill cport once, even if multiple packets try to do it */ + if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) { + /* If we race with resizing make sure cport is set for dir 1 */ + if (!cp->cport) { + cp->cport = cport; + by_me = true; + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + } /* We do not recalc hash_key_r under lock, we assume the * parameters in cp do not change, i.e. cport is * the only possible change. @@ -726,21 +738,17 @@ retry: hlist_bl_del_rcu(&hn->node); hlist_bl_add_head_rcu(&hn->node, head_new); } - if (!dir) { - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; - } } - spin_unlock(&cp->lock); if (head != head2) hlist_bl_unlock(head2); hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); - if (dir--) + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + + spin_unlock_bh(&cp->lock); + if (dir-- && by_me) goto next_dir; } @@ -1835,7 +1843,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) if (!rcu_dereference_protected(ipvs->conn_tab, 1)) return; - cancel_delayed_work_sync(&ipvs->conn_resize_work); + disable_delayed_work_sync(&ipvs->conn_resize_work); if (!atomic_read(&ipvs->conn_count)) goto unreg; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f5b7a2047291..d40b404c1bf6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, { if (!t) return 1 << min_bits; - n = roundup_pow_of_two(n); + n = n > 0 ? roundup_pow_of_two(n) : 1; if (lfactor < 0) { int factor = min(-lfactor, max_bits); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6632daa87ded..c7c7f6a7a9f6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work) if (!kd) continue; /* New config ? Stop kthread tasks */ - if (genid != genid_done) - ip_vs_est_kthread_stop(kd); + if (genid != genid_done) { + if (!id) { + /* Only we can stop kt 0 but not under mutex */ + mutex_unlock(&ipvs->est_mutex); + ip_vs_est_kthread_stop(kd); + mutex_lock(&ipvs->est_mutex); + if (!READ_ONCE(ipvs->enable)) + goto unlock; + /* kd for kt 0 is never destroyed */ + } else { + ip_vs_est_kthread_stop(kd); + } + } if (!kd->task && !ip_vs_est_stopped(ipvs)) { + bool start; + /* Do not start kthreads above 0 in calc phase */ - if ((!id || !ipvs->est_calc_phase) && - ip_vs_est_kthread_start(ipvs, kd) < 0) + if (id) + start = !ipvs->est_calc_phase; + else + start = kd->needed; + if (start && ip_vs_est_kthread_start(ipvs, kd) < 0) repeat = true; } } @@ -1102,6 +1118,24 @@ out: return dest; } +/* Put destination in trash */ +static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest, unsigned long istart, + bool cleanup) +{ + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + refcount_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); + /* dest lives in trash with reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = istart; + spin_unlock_bh(&ipvs->dest_trash_lock); +} + static void ip_vs_dest_rcu_free(struct rcu_head *head) { struct ip_vs_dest *dest; @@ -1461,9 +1495,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ntohs(dest->vport)); ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + /* On error put back dest into the trash */ if (ret < 0) - return ret; - __ip_vs_update_dest(svc, dest, udest, 1); + ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start, + false); + else + __ip_vs_update_dest(svc, dest, udest, 1); } else { /* * Allocate and initialize the dest structure @@ -1533,17 +1570,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - spin_lock_bh(&ipvs->dest_trash_lock); - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - refcount_read(&dest->refcnt)); - if (list_empty(&ipvs->dest_trash) && !cleanup) - mod_timer(&ipvs->dest_trash_timer, - jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash with reference */ - list_add(&dest->t_list, &ipvs->dest_trash); - dest->idle_start = 0; - spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_trash_put_dest(ipvs, dest, 0, cleanup); /* Queue up delayed work to expire all no destination connections. * No-op when CONFIG_SYSCTL is disabled. @@ -1812,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { + mutex_lock(&ipvs->est_mutex); + /* Now there is a service - full throttle */ WRITE_ONCE(ipvs->enable, 1); + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + /* Start estimation for first time */ - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); } return 0; @@ -2032,6 +2064,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc) cancel_delayed_work_sync(&ipvs->svc_resize_work); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2078,6 +2113,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2086,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = p; } } + /* Stop the tot_stats estimator early under service_mutex + * to avoid locking it again later. + */ + if (cleanup) + ip_vs_stop_estimator_tot_stats(ipvs); return 0; } @@ -2331,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, /* est_max_threads may depend on cpulist size */ ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); ipvs->est_calc_phase = 1; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); unlock: mutex_unlock(&ipvs->est_mutex); @@ -2351,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, mutex_lock(&ipvs->est_mutex); - if (ipvs->est_cpulist_valid) - mask = *valp; - else - mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); - ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + /* HK_TYPE_KTHREAD cpumask needs RCU protection */ + scoped_guard(rcu) { + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + } mutex_unlock(&ipvs->est_mutex); @@ -2411,7 +2457,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2438,7 +2484,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2463,7 +2509,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; + WRITE_ONCE(*valp, val); if (rcu_access_pointer(ipvs->conn_tab)) mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); @@ -2490,10 +2536,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; - if (rcu_access_pointer(ipvs->svc_table)) + mutex_lock(&ipvs->service_mutex); + WRITE_ONCE(*valp, val); + /* Make sure the services are present */ + if (rcu_access_pointer(ipvs->svc_table) && + READ_ONCE(ipvs->enable) && + !test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) mod_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 0); + mutex_unlock(&ipvs->service_mutex); } } return ret; @@ -3004,7 +3056,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) int old_gen, new_gen; u32 counts[8]; u32 bucket; - int count; + u32 count; + int loops; u32 sum1; u32 sum; int i; @@ -3020,6 +3073,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) if (!atomic_read(&ipvs->conn_count)) goto after_conns; old_gen = atomic_read(&ipvs->conn_tab_changes); + loops = 0; repeat_conn: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ @@ -3032,8 +3086,11 @@ repeat_conn: resched_score++; ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; - hlist_bl_for_each_entry_rcu(hn, e, head, node) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3042,37 +3099,41 @@ repeat_conn: new_gen = atomic_read(&ipvs->conn_tab_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_conns; old_gen = new_gen; goto repeat_conn; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_conns: t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); - seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Services:\t%u\n", count); seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); + loops = 0; repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ @@ -3086,8 +3147,11 @@ repeat_svc: ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; hlist_bl_for_each_entry_rcu(svc, e, head, - s_list) + s_list) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3096,24 +3160,27 @@ repeat_svc: new_gen = atomic_read(&ipvs->svc_table_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_svc; old_gen = new_gen; goto repeat_svc; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_svc: @@ -4967,7 +5034,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + if (ipvs->tot_stats->s.est.ktid != -2) { + /* Not stopped yet? This happens only on netns init error and + * we even do not need to lock the service_mutex for this case. + */ + mutex_lock(&ipvs->service_mutex); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + mutex_unlock(&ipvs->service_mutex); + } if (ipvs->est_cpulist_valid) free_cpumask_var(ipvs->sysctl_est_cpulist); @@ -5039,7 +5113,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; - if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net, ip_vs_status_show, NULL)) goto err_status; #endif diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 433ba3cab58c..ab09f5182951 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -68,6 +68,11 @@ and the limit of estimators per kthread - est_add_ktid: ktid where to add new ests, can point to empty slot where we should add kt data + - data protected by service_mutex: est_temp_list, est_add_ktid, + est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W) + - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist, + est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation, + est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0) */ static struct lock_class_key __ipvs_est_key; @@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data) } /* Schedule stop/start for kthread tasks */ -void ip_vs_est_reload_start(struct netns_ipvs *ipvs) +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart) { + lockdep_assert_held(&ipvs->est_mutex); + /* Ignore reloads before first service is added */ if (!READ_ONCE(ipvs->enable)) return; ip_vs_est_stopped_recalc(ipvs); - /* Bump the kthread configuration genid */ - atomic_inc(&ipvs->est_genid); + /* Bump the kthread configuration genid if stopping is requested */ + if (restart) + atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); } @@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) void *arr = NULL; int i; - if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && - READ_ONCE(ipvs->enable) && ipvs->est_max_threads) - return -EINVAL; - mutex_lock(&ipvs->est_mutex); + /* Allow kt 0 data to be created before the services are added + * and limit the kthreads when services are present. + */ + if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && + READ_ONCE(ipvs->enable) && ipvs->est_max_threads) { + ret = -EINVAL; + goto out; + } + for (i = 0; i < id; i++) { if (!ipvs->est_kt_arr[i]) break; @@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) kd->est_timer = jiffies; kd->id = id; ip_vs_est_set_params(ipvs, kd); + kd->needed = 1; /* Pre-allocate stats used in calc phase */ if (!id && !kd->calc_stats) { @@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) goto out; } - /* Start kthread tasks only when services are present */ - if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) { - ret = ip_vs_est_kthread_start(ipvs, kd); - if (ret < 0) - goto out; - } + /* Request kthread to be started */ + ip_vs_est_reload_start(ipvs, false); if (arr) ipvs->est_kt_count++; @@ -482,12 +492,11 @@ out: /* Start estimation for stats */ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { + struct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ? + ipvs->est_kt_arr[0] : NULL; struct ip_vs_estimator *est = &stats->est; int ret; - if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable)) - ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); - est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ @@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) * will not allocate much memory, just for kt 0. */ ret = 0; - if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0]) + if (!kd) { ret = ip_vs_est_add_kthread(ipvs); + } else if (!kd->needed) { + mutex_lock(&ipvs->est_mutex); + /* We have job for the kt 0 task */ + kd->needed = 1; + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); + } if (ret >= 0) hlist_add_head(&est->list, &ipvs->est_temp_list); else @@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) } end_kt0: - /* kt 0 is freed after all other kthreads and chains are empty */ + /* kt 0 task is stopped after all other kt slots and chains are empty */ if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) { kd = ipvs->est_kt_arr[0]; - if (!kd || !kd->est_count) { + if (kd && !kd->est_count) { mutex_lock(&ipvs->est_mutex); - if (kd) { - ip_vs_est_kthread_destroy(kd); - ipvs->est_kt_arr[0] = NULL; - } - ipvs->est_kt_count--; + /* Keep the kt0 data but request kthread_stop */ + kd->needed = 0; + ip_vs_est_reload_start(ipvs, true); mutex_unlock(&ipvs->est_mutex); ipvs->est_add_ktid = 0; } @@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&ipvs->service_mutex); + mutex_lock(&ipvs->est_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&ipvs->service_mutex); + mutex_unlock(&ipvs->est_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&ipvs->service_mutex); - /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period */ mutex_lock(&ipvs->est_mutex); for (id = 1; id < ipvs->est_kt_count; id++) { /* netns clean up started, abort */ - if (!READ_ONCE(ipvs->enable)) - goto unlock2; + if (kthread_should_stop() || !READ_ONCE(ipvs->enable)) { + mutex_unlock(&ipvs->est_mutex); + return; + } kd = ipvs->est_kt_arr[id]; if (!kd) continue; @@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) } mutex_unlock(&ipvs->est_mutex); + mutex_lock(&ipvs->service_mutex); + /* Move all estimators to est_temp_list but carefully, * all estimators and kthread data can be released while - * we reschedule. Even for kthread 0. + * we reschedule. */ step = 0; @@ -849,9 +865,7 @@ walk_chain: ip_vs_stop_estimator(ipvs, stats); /* Tasks are stopped, move without RCU grace period */ est->ktid = -1; - est->ktrow = row - kd->est_row; - if (est->ktrow < 0) - est->ktrow += IPVS_EST_NTICKS; + est->ktrow = delay; hlist_add_head(&est->list, &ipvs->est_temp_list); /* kd freed ? */ if (last) @@ -889,7 +903,6 @@ end_dequeue: if (genid == atomic_read(&ipvs->est_genid)) ipvs->est_calc_phase = 0; -unlock2: mutex_unlock(&ipvs->est_mutex); unlock: diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index e348fb90b8dc..3b0a70e154cd 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -13,22 +13,6 @@ #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> -#define NF_RECURSION_LIMIT 2 - -#ifndef CONFIG_PREEMPT_RT -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); -} -#else - -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return ¤t->net_xmit.nf_dup_skb_recursion; -} - -#endif - static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev, enum nf_dev_hooks hook) { diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2c4140e6f53c..785d8c244a77 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->tun = route->tuple[dir].in.tun; flow_tuple->encap_num = route->tuple[dir].in.num_encaps; + flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment; flow_tuple->tun_num = route->tuple[dir].in.num_tuns; switch (route->tuple[dir].xmit_type) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index fd56d663cb5b..9c05a50d6013 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -445,13 +445,13 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, switch (skb->protocol) { case htons(ETH_P_8021Q): vlan_hdr = (struct vlan_hdr *)skb->data; - __skb_pull(skb, VLAN_HLEN); + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vlan_hdr); skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): skb->protocol = __nf_flow_pppoe_proto(skb); - skb_pull(skb, PPPOE_SES_HLEN); + skb_pull_rcsum(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; } @@ -462,23 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, nf_flow_ip_tunnel_pop(ctx, skb); } -struct nf_flow_xmit { - const void *dest; - const void *source; - struct net_device *outdev; -}; - -static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, - struct nf_flow_xmit *xmit) -{ - skb->dev = xmit->outdev; - dev_hard_header(skb, skb->dev, ntohs(skb->protocol), - xmit->dest, xmit->source, skb->len); - dev_queue_xmit(skb); - - return NF_STOLEN; -} - static struct flow_offload_tuple_rhash * nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) @@ -524,7 +507,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -544,7 +527,34 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 1; } -static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) +/* Similar to skb_vlan_push. */ +static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id, + u32 needed_headroom) +{ + if (skb_vlan_tag_present(skb)) { + struct vlan_hdr *vhdr; + + if (skb_cow_head(skb, needed_headroom + VLAN_HLEN)) + return -1; + + __skb_push(skb, VLAN_HLEN); + if (skb_mac_header_was_set(skb)) + skb->mac_header -= VLAN_HLEN; + + vhdr = (struct vlan_hdr *)skb->data; + skb->network_header -= VLAN_HLEN; + vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + vhdr->h_vlan_encapsulated_proto = skb->protocol; + skb->protocol = skb->vlan_proto; + skb_postpush_rcsum(skb, skb->data, VLAN_HLEN); + } + __vlan_hwaccel_put_tag(skb, proto, id); + + return 0; +} + +static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id, + u32 needed_headroom) { int data_len = skb->len + sizeof(__be16); struct ppp_hdr { @@ -553,7 +563,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) } *ph; __be16 proto; - if (skb_cow_head(skb, PPPOE_SES_HLEN)) + if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN)) return -1; switch (skb->protocol) { @@ -730,21 +740,24 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, } static int nf_flow_encap_push(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct net_device *outdev) { + u32 needed_headroom = LL_RESERVED_SPACE(outdev); int i; - for (i = 0; i < tuple->encap_num; i++) { + for (i = tuple->encap_num - 1; i >= 0; i--) { switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): - skb_reset_mac_header(skb); - if (skb_vlan_push(skb, tuple->encap[i].proto, - tuple->encap[i].id) < 0) + if (nf_flow_vlan_push(skb, tuple->encap[i].proto, + tuple->encap[i].id, + needed_headroom) < 0) return -1; break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) + if (nf_flow_pppoe_push(skb, tuple->encap[i].id, + needed_headroom) < 0) return -1; break; } @@ -753,6 +766,76 @@ static int nf_flow_encap_push(struct sk_buff *skb, return 0; } +struct nf_flow_xmit { + const void *dest; + const void *source; + struct net_device *outdev; + struct flow_offload_tuple *tuple; + bool needs_gso_segment; +}; + +static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct net_device *dev = xmit->outdev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) + return; + } + + skb->dev = dev; + dev_hard_header(skb, dev, ntohs(skb->protocol), + xmit->dest, xmit->source, skb->len); + dev_queue_xmit(skb); +} + +static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct sk_buff *segs, *nskb; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) + return NF_DROP; + + if (segs) + consume_skb(skb); + else + segs = skb; + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + + if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) { + kfree_skb(segs); + kfree_skb_list(nskb); + return NF_STOLEN; + } + __nf_flow_queue_xmit(net, segs, xmit); + } + + return NF_STOLEN; +} + +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + if (xmit->tuple->encap_num) { + if (skb_is_gso(skb) && xmit->needs_gso_segment) + return nf_flow_encap_gso_xmit(net, skb, xmit); + + if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0) + return NF_DROP; + } + + __nf_flow_queue_xmit(net, skb, xmit); + + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -797,9 +880,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rtable(tuplehash->tuple.dst_cache); @@ -829,6 +909,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } @@ -1037,7 +1119,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -1119,9 +1201,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, &ip6_daddr, encap_limit) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rt6_info(tuplehash->tuple.dst_cache); @@ -1151,6 +1230,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index 6bb9579dcc2a..9e88ea6a2eef 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c @@ -86,6 +86,7 @@ struct nft_forward_info { u8 ingress_vlans; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + bool needs_gso_segment; enum flow_offload_xmit_type xmit_type; }; @@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, path->encap.proto; info->num_encaps++; } - if (path->type == DEV_PATH_PPPOE) + if (path->type == DEV_PATH_PPPOE) { memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; + info->needs_gso_segment = 1; + } break; case DEV_PATH_BRIDGE: if (is_zero_ether_addr(info->h_source)) @@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].xmit_type = info.xmit_type; } + route->tuple[dir].out.needs_gso_segment = info.needs_gso_segment; } int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d20ce5c36d31..87387adbca65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -407,6 +407,7 @@ static void nft_netdev_unregister_trans_hook(struct net *net, } static void nft_netdev_unregister_hooks(struct net *net, + const struct nft_table *table, struct list_head *hook_list, bool release_netdev) { @@ -414,8 +415,10 @@ static void nft_netdev_unregister_hooks(struct net *net, struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { - list_for_each_entry(ops, &hook->ops_list, list) - nf_unregister_net_hook(net, ops); + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } if (release_netdev) nft_netdev_hook_unlink_free_rcu(hook); } @@ -452,20 +455,25 @@ static void __nf_tables_unregister_hook(struct net *net, struct nft_base_chain *basechain; const struct nf_hook_ops *ops; - if (table->flags & NFT_TABLE_F_DORMANT || - !nft_is_base_chain(chain)) + if (!nft_is_base_chain(chain)) return; basechain = nft_base_chain(chain); ops = &basechain->ops; + /* must also be called for dormant tables */ + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { + nft_netdev_unregister_hooks(net, table, &basechain->hook_list, + release_netdev); + return; + } + + if (table->flags & NFT_TABLE_F_DORMANT) + return; + if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list, - release_netdev); - else - nf_unregister_net_hook(net, &basechain->ops); + nf_unregister_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -4205,6 +4213,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) struct nft_chain *chain; struct nft_ctx ctx = { .net = net, + .table = (struct nft_table *)table, .family = table->family, }; int err = 0; @@ -11281,11 +11290,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_hooks(net, table, + &nft_trans_chain_hooks(trans), + true); free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 5ddd5b6e135f..8ab186f86dd4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -153,7 +153,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb) + pkt->nhoff; else { - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return false; ptr = skb->data + nft_thoff(pkt); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index decc725a33c2..0caa9304d2d0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -261,10 +261,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { if (ret == -ENOENT) { @@ -353,8 +353,6 @@ nla_put_failure: static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_target *target = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -377,11 +375,21 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgchk_param par; + union nft_entry e = {}; - hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; + nft_target_set_tgchk_param(&par, ctx, target, info, &e, 0, false); + + ret = xt_check_hooks_target(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; @@ -515,10 +523,10 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + return xt_check_match(&par, size, proto, inv); } @@ -614,8 +622,6 @@ static int nft_match_large_dump(struct sk_buff *skb, static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_match *match = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -638,11 +644,30 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_match *match = expr->ops->data; + size_t size = XT_ALIGN(match->matchsize); + struct xt_mtchk_param par; + union nft_entry e = {}; + void *info; - hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; + if (NFT_EXPR_SIZE(size) > NFT_MATCH_LARGE_THRESH) { + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + info = priv->info; + } else { + info = nft_expr_priv(expr); + } + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, 0, false); + + ret = xt_check_hooks_match(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 0407d6f708ae..e6a07c0df207 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -376,7 +376,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; - if (pkt->tprot != IPPROTO_SCTP) + if (pkt->tprot != IPPROTO_SCTP || pkt->fragoff) goto err; do { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 4bce36c3a6a0..b9e88d7cf308 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -95,12 +95,15 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion(); struct nft_fwd_neigh *priv = nft_expr_priv(expr); void *addr = ®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; unsigned int verdict = NF_STOLEN; struct sk_buff *skb = pkt->skb; + int nhoff = skb_network_offset(skb); struct net_device *dev; + unsigned int hh_len; int neigh_table; switch (priv->nfproto) { @@ -111,7 +114,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*iph))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*iph))) { verdict = NF_DROP; goto out; } @@ -132,7 +135,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*ip6h))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*ip6h))) { verdict = NF_DROP; goto out; } @@ -151,13 +154,31 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } + if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) { + verdict = NF_DROP; + goto out; + } + dev = dev_get_by_index_rcu(nft_net(pkt), oif); - if (dev == NULL) - return; + if (dev == NULL) { + verdict = NF_DROP; + goto out; + } + + hh_len = LL_RESERVED_SPACE(dev); + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) { + verdict = NF_STOLEN; + goto out; + } + } skb->dev = dev; skb_clear_tstamp(skb); + (*nf_dup_skb_recursion)++; neigh_xmit(neigh_table, dev, addr, skb); + (*nf_dup_skb_recursion)--; out: regs->verdict.code = verdict; } diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index c02d5cb52143..45fe56da5044 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -33,7 +33,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - if (pkt->tprot != IPPROTO_TCP) { + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index f2101af8c867..89be443734f6 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, __be16 tport = 0; struct sock *sk; - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } @@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, memset(&taddr, 0, sizeof(taddr)); - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9f837fb5ceb4..2c67c2e6b132 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -477,11 +477,9 @@ int xt_check_proc_name(const char *name, unsigned int size) } EXPORT_SYMBOL(xt_check_proc_name); -int xt_check_match(struct xt_mtchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_match_common(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->match->matchsize) != size && par->match->matchsize != -1) { /* @@ -530,6 +528,14 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->proto); return -EINVAL; } + + return 0; +} + +static int xt_checkentry_match(struct xt_mtchk_param *par) +{ + int ret; + if (par->match->checkentry != NULL) { ret = par->match->checkentry(par); if (ret < 0) @@ -538,8 +544,34 @@ int xt_check_match(struct xt_mtchk_param *par, /* Flag up potential errors. */ return -EIO; } + + return 0; +} + +int xt_check_hooks_match(struct xt_mtchk_param *par) +{ + if (par->match->check_hooks != NULL) + return par->match->check_hooks(par); + return 0; } +EXPORT_SYMBOL_GPL(xt_check_hooks_match); + +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_match_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_match(par); + if (ret < 0) + return ret; + + return xt_checkentry_match(par); +} EXPORT_SYMBOL_GPL(xt_check_match); /** xt_check_entry_match - check that matches end before start of target @@ -1012,11 +1044,9 @@ bool xt_find_jump_offset(const unsigned int *offsets, } EXPORT_SYMBOL(xt_find_jump_offset); -int xt_check_target(struct xt_tgchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_target_common(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->target->targetsize) != size) { pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, @@ -1061,6 +1091,23 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->proto); return -EINVAL; } + + return 0; +} + +int xt_check_hooks_target(struct xt_tgchk_param *par) +{ + if (par->target->check_hooks != NULL) + return par->target->check_hooks(par); + + return 0; +} +EXPORT_SYMBOL_GPL(xt_check_hooks_target); + +static int xt_checkentry_target(struct xt_tgchk_param *par) +{ + int ret; + if (par->target->checkentry != NULL) { ret = par->target->checkentry(par); if (ret < 0) @@ -1071,6 +1118,22 @@ int xt_check_target(struct xt_tgchk_param *par, } return 0; } + +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_target_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_target(par); + if (ret < 0) + return ret; + + return xt_checkentry_target(par); +} EXPORT_SYMBOL_GPL(xt_check_target); /** diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 498f5871c84a..d2aeacf94230 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -354,7 +354,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -366,7 +366,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -398,7 +398,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -410,7 +410,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 116a885adb3c..80e1634bc51f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -247,6 +247,21 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static int tcpmss_tg4_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + + return 0; +} + /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { @@ -262,17 +277,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m) static int tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -286,17 +293,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -312,6 +311,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV4, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, .targetsize = sizeof(struct xt_tcpmss_info), @@ -322,6 +322,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV6, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, .targetsize = sizeof(struct xt_tcpmss_info), diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e4bea1d346cf..5f60e7298a1e 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -86,6 +86,9 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -95,6 +98,9 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -106,6 +112,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + unsigned short fragoff = 0; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; @@ -113,8 +120,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index a77088943107..913dbe3aa5e2 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -153,14 +153,10 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +static int addrtype_mt_check_hooks(const struct xt_mtchk_param *par) { - const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && @@ -176,6 +172,21 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) goto err; } + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { @@ -211,6 +222,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE @@ -221,6 +233,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index 9520dd00070b..6d1a44ab5eee 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -33,14 +33,10 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } -static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +static int devgroup_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_devgroup_info *info = par->matchinfo; - if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | - XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) - return -EINVAL; - if (info->flags & XT_DEVGROUP_MATCH_SRC && par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | @@ -56,9 +52,21 @@ static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) return 0; } +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + return 0; +} + static struct xt_match devgroup_mt_reg __read_mostly = { .name = "devgroup", .match = devgroup_mt, + .check_hooks = devgroup_mt_check_hooks, .checkentry = devgroup_mt_checkentry, .matchsize = sizeof(struct xt_devgroup_info), .family = NFPROTO_UNSPEC, diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index b96e8203ac54..a8503f5d26bf 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -30,6 +30,10 @@ static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) struct tcphdr _tcph; const struct tcphdr *th; + /* this is fine for IPv6 as ecn_mt_check6() enforces -p tcp */ + if (par->fragoff) + return false; + /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3bd127bfc114..2704b4b60d1e 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -658,6 +658,8 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; + if (ntohs(ip_hdr(skb)->frag_off) & IP_OFFSET) + return -1; nexthdr = ip_hdr(skb)->protocol; break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -681,7 +683,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if ((int)protoff < 0) + if ((int)protoff < 0 || ntohs(frag_off) & IP6_OFFSET) return -1; break; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index dc9485854002..e8807caede68 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -27,6 +27,9 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) { + if (p->fragoff) + return false; + return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p), xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers); } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index d2b0b52434fa..dd98f758176c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -91,14 +91,10 @@ match_outdev: return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -static int physdev_mt_check(const struct xt_mtchk_param *par) +static int physdev_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - static bool brnf_probed __read_mostly; - if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || - info->bitmask & ~XT_PHYSDEV_OP_MASK) - return -EINVAL; if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && @@ -107,6 +103,18 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + return 0; +} + +static int physdev_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_physdev_info *info = par->matchinfo; + static bool brnf_probed __read_mostly; + + if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || + info->bitmask & ~XT_PHYSDEV_OP_MASK) + return -EINVAL; + #define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) if (info->bitmask & XT_PHYSDEV_OP_IN) { if (info->physindev[0] == '\0') @@ -141,6 +149,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV4, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info), @@ -149,6 +158,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV6, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info), diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index b5fa65558318..ff54e3a8581e 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -126,13 +126,10 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int policy_mt_check(const struct xt_mtchk_param *par) +static int policy_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; - const char *errmsg = "neither incoming nor outgoing policy selected"; - - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { @@ -144,6 +141,21 @@ static int policy_mt_check(const struct xt_mtchk_param *par) errmsg = "input policy not valid in POSTROUTING and OUTPUT"; goto err; } + + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int policy_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; + if (info->len > XT_POLICY_MAX_ELEM) { errmsg = "too many policy elements"; goto err; @@ -158,6 +170,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV4, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -166,6 +179,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV6, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 731bc2cafae4..4ae04bba9358 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -431,6 +431,29 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) } static int +set_target_v3_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_info_ratelimited("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + (par->hook_mask & ~(1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + } + + return 0; +} + +static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; @@ -459,20 +482,6 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) } if (info->map_set.index != IPSET_INVALID_ID) { - if (strncmp(par->table, "mangle", 7)) { - pr_info_ratelimited("--map-set only usable from mangle table\n"); - ret = -EINVAL; - goto cleanup_del; - } - if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | - (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && - (par->hook_mask & ~(1 << NF_INET_FORWARD | - 1 << NF_INET_LOCAL_OUT | - 1 << NF_INET_POST_ROUTING))) { - pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); - ret = -EINVAL; - goto cleanup_del; - } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { @@ -672,6 +681,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV4, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE @@ -682,6 +692,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV6, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0d32d4841cb3..b9da8269161d 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -32,6 +32,10 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) u8 _opt[15 * 4 - sizeof(_tcph)]; unsigned int i, optlen; + /* this is fine for IPv6 as xt_tcpmss enforces -p tcp */ + if (par->fragoff) + return false; + /* If we don't have the whole header, drop packet. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index b10e1602c6b1..cb5ea4424ffc 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -97,6 +97,9 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -111,7 +114,7 @@ static struct vport *geneve_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_geneve_vport_ops = { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4014c9b5eb79..6cb5a697b396 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -63,6 +63,9 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_PTR(err); } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; } @@ -75,7 +78,7 @@ static struct vport *gre_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_gre_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 12055af832dc..c42642075685 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -73,37 +73,21 @@ static struct net_device *get_dpdev(const struct datapath *dp) return local->dev; } -struct vport *ovs_netdev_link(struct vport *vport, const char *name) +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel) { int err; - vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name); - if (!vport->dev) { + if (WARN_ON_ONCE(!vport->dev)) { err = -ENODEV; goto error_free_vport; } - /* Ensure that the device exists and that the provided - * name is not one of its aliases. - */ - if (strcmp(name, ovs_vport_name(vport))) { - err = -ENODEV; - goto error_put; - } - netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); - if (vport->dev->flags & IFF_LOOPBACK || - (vport->dev->type != ARPHRD_ETHER && - vport->dev->type != ARPHRD_NONE) || - ovs_is_internal_dev(vport->dev)) { - err = -EINVAL; - goto error_put; - } rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, get_dpdev(vport->dp), NULL, NULL, NULL); if (err) - goto error_unlock; + goto error_put_unlock; err = netdev_rx_handler_register(vport->dev, netdev_frame_hook, vport); @@ -119,10 +103,11 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) error_master_upper_dev_unlink: netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp)); -error_unlock: - rtnl_unlock(); -error_put: +error_put_unlock: + if (tunnel && vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev, 0, NULL); netdev_put(vport->dev, &vport->dev_tracker); + rtnl_unlock(); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -132,12 +117,39 @@ EXPORT_SYMBOL_GPL(ovs_netdev_link); static struct vport *netdev_create(const struct vport_parms *parms) { struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms); if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); + if (!vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); + + /* Ensure that the provided name is not an alias. */ + if (strcmp(parms->name, ovs_vport_name(vport))) { + err = -ENODEV; + goto error_put; + } + + if (vport->dev->flags & IFF_LOOPBACK || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || + ovs_is_internal_dev(vport->dev)) { + err = -EINVAL; + goto error_put; + } + + return ovs_netdev_link(vport, false); +error_put: + netdev_put(vport->dev, &vport->dev_tracker); +error_free_vport: + ovs_vport_free(vport); + return ERR_PTR(err); } static void vport_netdev_free(struct rcu_head *rcu) @@ -196,9 +208,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - rtnl_unlock(); + /* We can't put the device reference yet, since it can still be in + * use, but rtnl_unlock()->netdev_run_todo() will block until all + * the references are released, so the RCU call must be before it. + */ call_rcu(&vport->rcu, vport_netdev_free); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index c5d83a43bfc4..6c0d7366f986 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -13,7 +13,7 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); -struct vport *ovs_netdev_link(struct vport *vport, const char *name); +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 0b881b043bcf..c1b37b50d29e 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -126,6 +126,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -140,7 +143,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_vxlan_netdev_vport_ops = { diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 9508b6c38003..e45549f08eef 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -263,15 +263,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate); /* Receive handler for PSP packets. * - * Presently it accepts only already-authenticated packets and does not - * support optional fields, such as virtualization cookies. The caller should - * ensure that skb->data is pointing to the mac header, and that skb->mac_len - * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE - * is not supported). + * Accepts only already-authenticated packets. The full PSP header is + * stripped according to psph->hdrlen; any optional fields it advertises + * (virtualization cookies, etc.) are ignored and discarded along with the + * rest of the header. The caller should ensure that skb->data is pointing + * to the mac header, and that skb->mac_len is set. This function does not + * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported). */ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) { - int l2_hlen = 0, l3_hlen, encap; + int l2_hlen = 0, l3_hlen, encap, psp_hlen; struct psp_skb_ext *pse; struct psphdr *psph; struct ethhdr *eth; @@ -312,18 +313,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT))) return -EINVAL; - pse = skb_ext_add(skb, SKB_EXT_PSP); - if (!pse) + psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + + sizeof(struct udphdr)); + + /* Strip the full PSP header per psph->hdrlen; VC/options are pulled + * into the linear region only so they can be discarded with the + * rest of the header. + */ + psp_hlen = (psph->hdrlen + 1) * 8; + + if (unlikely(psp_hlen < sizeof(struct psphdr))) + return -EINVAL; + + if (psp_hlen > sizeof(struct psphdr) && + !pskb_may_pull(skb, l2_hlen + l3_hlen + + sizeof(struct udphdr) + psp_hlen)) return -EINVAL; psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + sizeof(struct udphdr)); + + pse = skb_ext_add(skb, SKB_EXT_PSP); + if (!pse) + return -EINVAL; + pse->spi = psph->spi; pse->dev_id = dev_id; pse->generation = generation; pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl); - encap = PSP_ENCAP_HLEN; + encap = sizeof(struct udphdr) + psp_hlen; encap += strip_icv ? PSP_TRL_SIZE : 0; if (proto == htons(ETH_P_IP)) { @@ -340,8 +359,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap); } - memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen); - skb_pull(skb, PSP_ENCAP_HLEN); + memmove(skb->data + sizeof(struct udphdr) + psp_hlen, + skb->data, l2_hlen + l3_hlen); + skb_pull(skb, sizeof(struct udphdr) + psp_hlen); if (strip_icv) pskb_trim(skb, skb->len - PSP_TRL_SIZE); diff --git a/net/rds/message.c b/net/rds/message.c index eaa6f22601a4..25fedcb3cd00 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -131,24 +131,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, */ static void rds_message_purge(struct rds_message *rm) { + struct rds_znotifier *znotifier; unsigned long i, flags; - bool zcopy = false; + bool zcopy; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); + znotifier = rm->data.op_mmp_znotifier; + rm->data.op_mmp_znotifier = NULL; + zcopy = !!znotifier; + if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; - if (rm->data.op_mmp_znotifier) { - zcopy = true; - rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + if (znotifier) { + rds_rm_zerocopy_callback(rs, znotifier); rds_wake_sk_sleep(rs); - rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; + } else if (znotifier) { + /* + * Zerocopy can fail before the message is queued on the + * socket, so there is no rs to carry the notification. + */ + mm_unaccount_pinned_pages(&znotifier->z_mmp); + kfree(rds_info_from_znotifier(znotifier)); } spin_unlock_irqrestore(&rm->m_rs_lock, flags); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 13c6d1869a14..5862933be8d7 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ -static void cobalt_newton_step(struct cobalt_vars *vars) +static void cobalt_newton_step(struct cobalt_vars *vars, u32 count) { u32 invsqrt, invsqrt2; u64 val; invsqrt = vars->rec_inv_sqrt; invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - val = (3LL << 32) - ((u64)vars->count * invsqrt2); + val = (3LL << 32) - ((u64)count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); @@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars) vars->rec_inv_sqrt = val; } -static void cobalt_invsqrt(struct cobalt_vars *vars) +static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count) { - if (vars->count < REC_INV_SQRT_CACHE) - vars->rec_inv_sqrt = inv_sqrt_cache[vars->count]; + if (count < REC_INV_SQRT_CACHE) + vars->rec_inv_sqrt = inv_sqrt_cache[count]; else - cobalt_newton_step(vars); + cobalt_newton_step(vars, count); } static void cobalt_vars_init(struct cobalt_vars *vars) @@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars, bool up = false; if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { - up = !vars->p_drop; - vars->p_drop += p->p_inc; - if (vars->p_drop < p->p_inc) - vars->p_drop = ~0; - vars->blue_timer = now; - } - vars->dropping = true; - vars->drop_next = now; + u32 p_drop = vars->p_drop; + + up = !p_drop; + p_drop += p->p_inc; + if (p_drop < p->p_inc) + p_drop = ~0; + WRITE_ONCE(vars->p_drop, p_drop); + WRITE_ONCE(vars->blue_timer, now); + } + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, now); if (!vars->count) - vars->count = 1; + WRITE_ONCE(vars->count, 1); return up; } @@ -475,20 +478,20 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, if (vars->p_drop && ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { if (vars->p_drop < p->p_dec) - vars->p_drop = 0; + WRITE_ONCE(vars->p_drop, 0); else - vars->p_drop -= p->p_dec; - vars->blue_timer = now; + WRITE_ONCE(vars->p_drop, vars->p_drop - p->p_dec); + WRITE_ONCE(vars->blue_timer, now); down = !vars->p_drop; } - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->count, vars->count - 1); + cobalt_invsqrt(vars, vars->count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); } return down; @@ -507,6 +510,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, bool next_due, over_target; ktime_t schedule; u64 sojourn; + u32 count; /* The 'schedule' variable records, in its sign, whether 'now' is before or * after 'drop_next'. This allows 'drop_next' to be updated before the next @@ -528,21 +532,22 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, over_target = sojourn > p->target && sojourn > p->mtu_time * bulk_flows * 2 && sojourn > p->mtu_time * 4; - next_due = vars->count && ktime_to_ns(schedule) >= 0; + count = vars->count; + next_due = count && ktime_to_ns(schedule) >= 0; vars->ecn_marked = false; if (over_target) { if (!vars->dropping) { - vars->dropping = true; - vars->drop_next = cobalt_control(now, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, + cobalt_control(now, p->interval, + vars->rec_inv_sqrt)); } - if (!vars->count) - vars->count = 1; + if (!count) + count = 1; } else if (vars->dropping) { - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } if (next_due && vars->dropping) { @@ -550,23 +555,23 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) reason = QDISC_DROP_CONGESTED; - vars->count++; - if (!vars->count) - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count++; + if (!count) + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); } else { while (next_due) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); - next_due = vars->count && ktime_to_ns(schedule) >= 0; + next_due = count && ktime_to_ns(schedule) >= 0; } } @@ -575,11 +580,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, get_random_u32() < vars->p_drop) reason = QDISC_DROP_FLOOD_PROTECTION; + WRITE_ONCE(vars->count, count); /* Overload the drop_next field as an activity timeout */ - if (!vars->count) - vars->drop_next = ktime_add_ns(now, p->interval); + if (!count) + WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval)); else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) - vars->drop_next = now; + WRITE_ONCE(vars->drop_next, now); return reason; } @@ -914,7 +920,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) struct sk_buff *skb = flow->head; if (skb) { - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); } @@ -926,7 +932,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) { if (!flow->head) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -1357,7 +1363,7 @@ found: if (elig_ack_prev) elig_ack_prev->next = elig_ack->next; else - flow->head = elig_ack->next; + WRITE_ONCE(flow->head, elig_ack->next); skb_mark_not_on_list(elig_ack); @@ -1595,11 +1601,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; - b->backlogs[idx] -= len; WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len); sch->qstats.backlog -= len; - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) @@ -1824,11 +1830,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->backlogs[idx] += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; WRITE_ONCE(b->bytes, b->bytes + slen); WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1861,11 +1867,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* stats */ WRITE_ONCE(b->packets, b->packets + 1); - b->backlogs[idx] += len - ack_pkt_len; sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1924,7 +1930,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow->set = CAKE_SET_SPARSE; WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); - flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, cake_get_flow_quantum(b, flow, q->config->flow_mode)); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. @@ -1977,7 +1983,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) if (flow->head) { skb = dequeue_head(flow); len = qdisc_pkt_len(skb); - b->backlogs[q->cur_flow] -= len; + WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len); WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; @@ -2166,7 +2172,8 @@ retry: } } - flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, + flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode)); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2232,10 +2239,10 @@ retry: if (q->config->rate_flags & CAKE_FLAG_INGRESS) { len = cake_advance_shaper(q, b, skb, now, true); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; } - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); @@ -2259,7 +2266,7 @@ retry: delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { @@ -3137,7 +3144,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, flow = &b->flows[idx % CAKE_QUEUES]; - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -3146,13 +3153,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = b->backlogs[idx % CAKE_QUEUES]; - qs.drops = flow->dropped; + qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]); + qs.drops = READ_ONCE(flow->dropped); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (flow) { ktime_t now = ktime_get(); + bool dropping; + u32 p_drop; stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) @@ -3167,21 +3176,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; \ } while (0) - PUT_STAT_S32(DEFICIT, flow->deficit); - PUT_STAT_U32(DROPPING, flow->cvars.dropping); - PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); - PUT_STAT_U32(P_DROP, flow->cvars.p_drop); - if (flow->cvars.p_drop) { + PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit)); + dropping = READ_ONCE(flow->cvars.dropping); + PUT_STAT_U32(DROPPING, dropping); + PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count)); + p_drop = READ_ONCE(flow->cvars.p_drop); + PUT_STAT_U32(P_DROP, p_drop); + if (p_drop) { PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + READ_ONCE(flow->cvars.blue_timer)))); } - if (flow->cvars.dropping) { + if (dropping) { PUT_STAT_S32(DROP_NEXT_US, ktime_to_us( ktime_sub(now, - flow->cvars.drop_next))); + READ_ONCE(flow->cvars.drop_next)))); } if (nla_nest_end(d->skb, stats) < 0) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0664b2f2d6f2..24db54684e8a 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -117,7 +117,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) { struct sk_buff *skb = flow->head; - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); return skb; } @@ -127,7 +127,7 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, struct sk_buff *skb) { if (flow->head == NULL) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -173,8 +173,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, } while (++i < max_packets && len < threshold); /* Tell codel to increase its signal strength also */ - flow->cvars.count += i; - q->backlogs[idx] -= len; + WRITE_ONCE(flow->cvars.count, flow->cvars.count + i); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] - len); q->memory_usage -= mem; sch->qstats.drops += i; sch->qstats.backlog -= len; @@ -204,13 +204,13 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, codel_set_enqueue_time(skb); flow = &q->flows[idx]; flow_queue_add(flow, skb); - q->backlogs[idx] += qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] + qdisc_pkt_len(skb)); qdisc_qstats_backlog_inc(sch, skb); if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; - flow->deficit = q->quantum; + WRITE_ONCE(flow->deficit, q->quantum); } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -263,7 +263,8 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) flow = container_of(vars, struct fq_codel_flow, cvars); if (flow->head) { skb = dequeue_head(flow); - q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[flow - q->flows], + q->backlogs[flow - q->flows] - qdisc_pkt_len(skb)); q->memory_usage -= get_codel_cb(skb)->mem_usage; sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -296,7 +297,7 @@ begin: flow = list_first_entry(head, struct fq_codel_flow, flowchain); if (flow->deficit <= 0) { - flow->deficit += q->quantum; + WRITE_ONCE(flow->deficit, flow->deficit + q->quantum); list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } @@ -314,7 +315,7 @@ begin: goto begin; } qdisc_bstats_update(sch, skb); - flow->deficit -= qdisc_pkt_len(skb); + WRITE_ONCE(flow->deficit, flow->deficit - qdisc_pkt_len(skb)); if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, @@ -328,7 +329,7 @@ begin: static void fq_codel_flow_purge(struct fq_codel_flow *flow) { rtnl_kfree_skbs(flow->head, flow->tail); - flow->head = NULL; + WRITE_ONCE(flow->head, NULL); } static void fq_codel_reset(struct Qdisc *sch) @@ -656,21 +657,21 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; - xstats.class_stats.deficit = flow->deficit; + xstats.class_stats.deficit = READ_ONCE(flow->deficit); xstats.class_stats.ldelay = - codel_time_to_us(flow->cvars.ldelay); - xstats.class_stats.count = flow->cvars.count; - xstats.class_stats.lastcount = flow->cvars.lastcount; - xstats.class_stats.dropping = flow->cvars.dropping; - if (flow->cvars.dropping) { - codel_tdiff_t delta = flow->cvars.drop_next - + codel_time_to_us(READ_ONCE(flow->cvars.ldelay)); + xstats.class_stats.count = READ_ONCE(flow->cvars.count); + xstats.class_stats.lastcount = READ_ONCE(flow->cvars.lastcount); + xstats.class_stats.dropping = READ_ONCE(flow->cvars.dropping); + if (xstats.class_stats.dropping) { + codel_tdiff_t delta = READ_ONCE(flow->cvars.drop_next) - codel_get_time(); xstats.class_stats.drop_next = (delta >= 0) ? codel_time_to_us(delta) : -codel_time_to_us(-delta); } - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -679,7 +680,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = q->backlogs[idx]; + qs.backlog = READ_ONCE(q->backlogs[idx]); qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index fb53fbf0e328..b41f2def2e2c 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -219,16 +219,14 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * packet timestamp. */ if (!params->dq_rate_estimator) { - vars->qdelay = now - pie_get_enqueue_time(skb); + WRITE_ONCE(vars->qdelay, + backlog ? now - pie_get_enqueue_time(skb) : 0); if (vars->dq_tstamp != DTIME_INVALID) dtime = now - vars->dq_tstamp; vars->dq_tstamp = now; - if (backlog == 0) - vars->qdelay = 0; - if (dtime == 0) return; @@ -376,7 +374,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) delta += MAX_PROB / (100 / 2); - vars->prob += delta; + WRITE_ONCE(vars->prob, vars->prob + delta); if (delta > 0) { /* prevent overflow */ @@ -401,7 +399,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay == 0 && qdelay_old == 0 && update_prob) /* Reduce drop probability to 98.4% */ - vars->prob -= vars->prob / 64; + WRITE_ONCE(vars->prob, vars->prob - vars->prob / 64); WRITE_ONCE(vars->qdelay, qdelay); vars->backlog_old = backlog; @@ -501,7 +499,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { - .prob = q->vars.prob << BITS_PER_BYTE, + .prob = READ_ONCE(q->vars.prob) << BITS_PER_BYTE, .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / NSEC_PER_USEC, .packets_in = READ_ONCE(q->stats.packets_in), @@ -512,7 +510,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) }; /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ - st.dq_rate_estimating = q->params.dq_rate_estimator; + st.dq_rate_estimating = READ_ONCE(q->params.dq_rate_estimator); /* unscale and return dq_rate in bytes per sec */ if (st.dq_rate_estimating) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 432b8a3000a5..4d0e44a2e7c6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -162,7 +162,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; - skb = child->dequeue(child); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bd5ef561030f..d3ee8e5479b3 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -441,7 +441,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; struct sk_buff *skb; - skb = child->dequeue(q->qdisc); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c3f3181dba54..f39822babf88 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -225,7 +225,8 @@ static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = q->slots[x].qlen--; + d = q->slots[x].qlen; + WRITE_ONCE(q->slots[x].qlen, d - 1); if (n == p && q->cur_depth == d) q->cur_depth--; sfq_link(q, x); @@ -238,7 +239,8 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = ++q->slots[x].qlen; + d = q->slots[x].qlen + 1; + WRITE_ONCE(q->slots[x].qlen, d); if (q->cur_depth < d) q->cur_depth = d; sfq_link(q, x); @@ -298,7 +300,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - slot->backlog -= len; + WRITE_ONCE(slot->backlog, slot->backlog - len); sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -314,7 +316,7 @@ drop: q->tail = NULL; /* no more active slots */ else q->tail->next = slot->next; - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); goto drop; } @@ -364,10 +366,10 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; - slot->backlog = 0; /* should already be 0 anyway... */ + WRITE_ONCE(slot->backlog, 0); /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } @@ -426,7 +428,7 @@ congestion_drop: head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; - slot->backlog -= delta; + WRITE_ONCE(slot->backlog, slot->backlog - delta); qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); @@ -436,7 +438,7 @@ congestion_drop: enqueue: qdisc_qstats_backlog_inc(sch, skb); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -452,7 +454,7 @@ enqueue: */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; @@ -489,7 +491,7 @@ next_slot: slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; - slot->allot += q->quantum; + WRITE_ONCE(slot->allot, slot->allot + q->quantum); goto next_slot; } skb = slot_dequeue_head(slot); @@ -497,10 +499,10 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - slot->backlog -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog - qdisc_pkt_len(skb)); /* Is the slot empty? */ if (slot->qlen == 0) { - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); next_a = slot->next; if (a == next_a) { q->tail = NULL; /* no more active slots */ @@ -508,7 +510,7 @@ next_slot: } q->tail->next = next_a; } else { - slot->allot -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->allot, slot->allot - qdisc_pkt_len(skb)); } return skb; } @@ -549,9 +551,9 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } - slot->backlog = 0; + WRITE_ONCE(slot->backlog, 0); red_set_vars(&slot->vars); - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); } q->tail = NULL; @@ -570,7 +572,7 @@ drop: dropped++; continue; } - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; } @@ -581,7 +583,7 @@ drop: slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -591,7 +593,7 @@ drop: q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } } sch->q.qlen -= dropped; @@ -905,16 +907,16 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl - 1]; + sfq_index idx = READ_ONCE(q->ht[cl - 1]); struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; - qs.qlen = slot->qlen; - qs.backlog = slot->backlog; + xstats.allot = READ_ONCE(slot->allot); + qs.qlen = READ_ONCE(slot->qlen); + qs.backlog = READ_ONCE(slot->backlog); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; @@ -930,7 +932,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT) { + if (READ_ONCE(q->ht[i]) == SFQ_EMPTY_SLOT) { arg->count++; continue; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1a565095376a..185dbed7de5d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1628,12 +1628,8 @@ static void smc_connect_work(struct work_struct *work) lock_sock(&smc->sk); if (rc != 0 || smc->sk.sk_err) { smc->sk.sk_state = SMC_CLOSED; - if (rc == -EPIPE || rc == -EAGAIN) - smc->sk.sk_err = EPIPE; - else if (rc == -ECONNREFUSED) - smc->sk.sk_err = ECONNREFUSED; - else if (signal_pending(current)) - smc->sk.sk_err = -sock_intr_errno(timeo); + if (!smc->sk.sk_err) + smc->sk.sk_err = (rc == -EAGAIN) ? EPIPE : -rc; sock_put(&smc->sk); /* passive closing */ goto out; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 798243eabb1f..2590e855f6a5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2317,9 +2317,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (copied < 0) goto splice_requeue; - if (chunk < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; + if (copied < rxm->full_len) { + rxm->offset += copied; + rxm->full_len -= copied; goto splice_requeue; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d787ca3e74..1cbf36ea043b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3323,6 +3323,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sk_buff *skb; int answ = 0; + if (sk->sk_type != SOCK_STREAM) + return -EOPNOTSUPP; + mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a7967a345827..0783555e2526 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -607,6 +607,8 @@ static void unix_gc(struct work_struct *work) struct sk_buff_head hitlist; struct sk_buff *skb; + WRITE_ONCE(gc_in_progress, true); + spin_lock(&unix_gc_lock); if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { @@ -649,10 +651,8 @@ void unix_schedule_gc(struct user_struct *user) READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) return; - if (!READ_ONCE(gc_in_progress)) { - WRITE_ONCE(gc_in_progress, true); + if (!READ_ONCE(gc_in_progress)) queue_work(system_dfl_wq, &unix_gc_work); - } if (user && READ_ONCE(unix_graph_cyclic_sccs)) flush_work(&unix_gc_work); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 416d533f493d..9b8014516f4f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -447,7 +447,9 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->buf_used + len > vvs->buf_alloc) + u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + + if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f334cdef8958..7db9cd433801 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1276,6 +1276,18 @@ static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, rtnl_unlock(); return -ENODEV; } + + /* + * The first invocation validated the wdev's netns against + * the caller via __cfg80211_wdev_from_attrs(). The wiphy + * may have moved netns between dumpit invocations (via + * NL80211_CMD_SET_WIPHY_NETNS), so re-check here. + */ + if (!net_eq(wiphy_net(wiphy), sock_net(cb->skb->sk))) { + rtnl_unlock(); + return -ENODEV; + } + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -13867,6 +13879,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(net)) return PTR_ERR(net); + /* + * The caller already has CAP_NET_ADMIN over the source netns + * (enforced by GENL_UNS_ADMIN_PERM on the genl op). Mirror the + * convention used by net/core/rtnetlink.c::rtnl_get_net_ns_capable() + * and require CAP_NET_ADMIN over the target netns as well, so that + * a caller that is privileged in their own user namespace cannot + * push a wiphy into a netns where they have no privilege. + */ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return -EPERM; + } + err = 0; /* check if anything to do */ @@ -19828,6 +19853,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -19835,6 +19861,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 4c8ea0583f94..d6cd0de64d1f 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) out->ftm.ftms_per_burst = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); if (capa->ftm.max_ftms_per_burst && (out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst || diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 887abed25466..5e5786cd9af5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) { - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + struct xsk_addrs *xsk_addr; + + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (unlikely(!xsk_addr)) + return NULL; + + xsk_addr->addrs[0] = addr; + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + return xsk_addr; +} + +static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); + if (likely(xsk_addr)) + xsk_addr->num_descs = 1; + return xsk_addr; +} + +static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + return 0; + } + + if (unlikely(!__xsk_addrs_alloc(skb, addr))) + return -ENOMEM; + return 0; } static void xsk_inc_num_desc(struct sk_buff *skb) @@ -685,7 +718,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; for (i = 0; i < num_descs; i++) { @@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, - u64 addr) +static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { + int err; + + err = xsk_skb_destructor_set_addr(skb, addr); + if (unlikely(err)) + return err; + skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; - xsk_skb_destructor_set_addr(skb, addr); + return 0; } static void xsk_consume_skb(struct sk_buff *skb) @@ -740,7 +779,7 @@ static void xsk_consume_skb(struct sk_buff *skb) u32 num_descs = xsk_get_num_desc(skb); struct xsk_addrs *xsk_addr; - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } @@ -819,28 +858,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); - - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return ERR_PTR(err); + } } } else { struct xsk_addrs *xsk_addr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - } + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb @@ -856,8 +886,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) + if (unlikely(i >= MAX_SKB_FRAGS)) { + if (!xs->skb) + kfree_skb(skb); return ERR_PTR(-EOVERFLOW); + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); @@ -914,7 +947,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, xs->pool, hr); @@ -927,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct page *page; u8 *vaddr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) { - err = -ENOMEM; - goto free_err; - } - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; } if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { @@ -964,18 +987,28 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } + if (!xs->skb) { + err = xsk_skb_init_misc(skb, xs, desc->addr); + if (unlikely(err)) + goto free_err; + } xsk_inc_num_desc(skb); return skb; free_err: - if (skb && !skb_shinfo(skb)->nr_frags) + if (skb && !xs->skb) kfree_skb(skb); if (err == -EOVERFLOW) { - /* Drop the packet */ - xsk_inc_num_desc(xs->skb); - xsk_drop_skb(xs->skb); + if (xs->skb) { + /* Drop the packet */ + xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + } else { + xsk_cq_cancel_locked(xs->pool, 1); + xs->tx->invalid_descs++; + } xskq_cons_release(xs->tx); } else { /* Let application retry */ diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index cd7bc50872f6..d981cfdd8535 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (force_zc && force_copy) return -EINVAL; + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) + return -EOPNOTSUPP; + if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a9652b422f51..cc35c2fcbbe0 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) @@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1748d374abca..686014d39429 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del_rcu(&x->bydst); - hlist_del_rcu(&x->bysrc); - if (x->km.seq) - hlist_del_rcu(&x->byseq); + hlist_del_init_rcu(&x->bydst); + hlist_del_init_rcu(&x->bysrc); + if (!hlist_unhashed(&x->byseq)) + hlist_del_init_rcu(&x->byseq); if (!hlist_unhashed(&x->state_cache)) hlist_del_rcu(&x->state_cache); if (!hlist_unhashed(&x->state_cache_input)) hlist_del_rcu(&x->state_cache_input); - if (x->id.spi) - hlist_del_rcu(&x->byspi); + if (!hlist_unhashed(&x->byspi)) + hlist_del_init_rcu(&x->byspi); net->xfrm.state_num--; xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d56450f61669..38a90e5ee3d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 85ca4d1ecf9e..82809d5b2478 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -31,6 +31,7 @@ TEST_PROGS = \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ iou-zcrx.py \ + ipsec_vxlan.py \ irq.py \ loopback.sh \ nic_timestamp.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index dd50cb8a7911..8c132ace2b8d 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -3,6 +3,10 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_INET6_ESP=y +CONFIG_INET6_ESP_OFFLOAD=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y @@ -14,3 +18,4 @@ CONFIG_NETKIT=y CONFIG_NET_SCH_INGRESS=y CONFIG_UDMABUF=y CONFIG_VXLAN=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py new file mode 100755 index 000000000000..0740a4d85240 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Traffic test for VXLAN + IPsec crypto-offload.""" + +import os + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx +from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + +# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849) +INNER_V4_LOCAL = "198.51.100.1" +INNER_V4_REMOTE = "198.51.100.2" +INNER_V6_LOCAL = "2001:db8:100::1" +INNER_V6_REMOTE = "2001:db8:100::2" + +# ESP parameters +SPI_OUT = "0x1000" +SPI_IN = "0x1001" +# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV +ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128" + + +def xfrm(args, host=None): + """Runs 'ip xfrm' via shell to preserve parentheses in algo names.""" + cmd(f"ip xfrm {args}", shell=True, host=host) + + +def check_xfrm_offload_support(): + """Skips if iproute2 lacks xfrm offload support.""" + out = cmd("ip xfrm state help", fail=False) + if "offload" not in out.stdout + out.stderr: + raise KsftSkipEx("iproute2 too old, missing xfrm offload") + + +def check_esp_hw_offload(cfg): + """Skips if device lacks esp-hw-offload support.""" + check_xfrm_offload_support() + try: + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + except (CmdExitFailure, IndexError) as e: + raise KsftSkipEx(f"can't query features: {e}") from e + if not feat.get("esp-hw-offload", {}).get("active"): + raise KsftSkipEx("Device does not support esp-hw-offload") + + +def get_tx_drops(cfg): + """Returns TX dropped counter from the physical device.""" + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + return stats["stats64"]["tx"]["dropped"] + + +def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver): + """Sets up VXLAN tunnel with IPsec transport-mode crypto-offload.""" + vxlan_name = f"vx{os.getpid()}" + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + if inner_ipver == "4": + inner_local = f"{INNER_V4_LOCAL}/24" + inner_remote = f"{INNER_V4_REMOTE}/24" + addr_extra = "" + else: + inner_local = f"{INNER_V6_LOCAL}/64" + inner_remote = f"{INNER_V6_REMOTE}/64" + addr_extra = " nodad" + + if outer_ipver == "6": + vxlan_opts = "udp6zerocsumtx udp6zerocsumrx" + else: + vxlan_opts = "noudpcsum" + + # VXLAN tunnel - local side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {vxlan_name}") + ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}") + ip(f"link set {vxlan_name} up") + + # VXLAN tunnel - remote side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {vxlan_name}", host=cfg.remote) + ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}", + host=cfg.remote) + ip(f"link set {vxlan_name} up", host=cfg.remote) + + # xfrm state - local outbound SA + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir out") + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}") + + # xfrm state - local inbound SA + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir in") + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}") + + # xfrm state - remote outbound SA (mirror, software crypto) + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}", host=cfg.remote) + + # xfrm state - remote inbound SA (mirror, software crypto) + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}", host=cfg.remote) + + # xfrm policy - local out + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport") + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out") + + # xfrm policy - local in + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport") + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in") + + # xfrm policy - remote out + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out", host=cfg.remote) + + # xfrm policy - remote in + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in", host=cfg.remote) + + +def _vxlan_ipsec_variants(): + """Generates outer/inner IP version variants.""" + for outer in ["4", "6"]: + for inner in ["4", "6"]: + yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner) + + +@ksft_variants(_vxlan_ipsec_variants()) +def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver): + """Tests VXLAN+IPsec crypto-offload has no TX drops.""" + cfg.require_ipver(outer_ipver) + check_esp_hw_offload(cfg) + + setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver) + + if inner_ipver == "4": + inner_local = INNER_V4_LOCAL + inner_remote = INNER_V4_REMOTE + ping = "ping" + else: + inner_local = INNER_V6_LOCAL + inner_remote = INNER_V6_REMOTE + ping = "ping -6" + + cmd(f"{ping} -c 1 -W 2 {inner_remote}") + + drops_before = get_tx_drops(cfg) + + runner = Iperf3Runner(cfg, server_ip=inner_local, + client_ip=inner_remote) + bw_gbps = runner.measure_bandwidth(reverse=True) + + cfg.wait_hw_stats_settle() + drops_after = get_tx_drops(cfg) + + ksft_eq(drops_after - drops_before, 0, + comment="TX drops during VXLAN+IPsec") + ksft_ge(bw_gbps, 0.1, + comment="Minimum 100Mbps over VXLAN+IPsec") + + +def main(): + """Runs VXLAN+IPsec crypto-offload GSO selftest.""" + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index f181fa2d38fc..e24660e5c27f 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -48,7 +48,10 @@ class Iperf3Runner: Starts the iperf3 client with the configured options. """ cmdline = self._build_client(streams, duration, reverse) - return cmd(cmdline, background=background, host=self.env.remote) + kwargs = {"background": background, "host": self.env.remote} + if not background: + kwargs["timeout"] = duration + 5 + return cmd(cmdline, **kwargs) def measure_bandwidth(self, reverse=False): """ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index a275ed584026..f3da38c54d27 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -96,6 +96,7 @@ TEST_PROGS := \ srv6_hl2encap_red_l2vpn_test.sh \ srv6_iptunnel_cache.sh \ stress_reuseport_listen.sh \ + tcp_ecmp_failover.sh \ tcp_fastopen_backup_key.sh \ test_bpf.sh \ test_bridge_backup_port.sh \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 5fea7e7df628..989a5975dcea 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() { wait_local_port_listen "${@}" "tcp" } +# $1: error file, $2: cmd, $3: expected msg, [$4: expected error] mptcp_lib_check_output() { local err="${1}" local cmd="${2}" local expected="${3}" + local exp_error="${4:-0}" local cmd_ret=0 local out - if ! out=$(${cmd} 2>"${err}"); then - cmd_ret=${?} - fi + out=$(${cmd} 2>"${err}") || cmd_ret=1 - if [ ${cmd_ret} -ne 0 ]; then - mptcp_lib_pr_fail "command execution '${cmd}' stderr" - cat "${err}" + if [ "${cmd_ret}" != "${exp_error}" ]; then + mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:" + if [ "${exp_error}" = 0 ]; then + cat "${err}" + else + echo "${out}" + fi return 2 elif [ "${out}" = "${expected}" ]; then return 0 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 123d9d7a0278..04594dfc22b1 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -122,10 +122,12 @@ check() local cmd="$1" local expected="$2" local msg="$3" + local exp_error="$4" local rc=0 mptcp_lib_print_title "$msg" - mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" || + rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" ret=${KSFT_FAIL} @@ -158,13 +160,13 @@ check "show_endpoints" \ "3,10.0.1.3,signal backup")" "dump addrs" del_endpoint 2 -check "get_endpoint 2" "" "simple del addr" +check "get_endpoint 2" "" "simple del addr" 1 check "show_endpoints" \ "$(format_endpoints "1,10.0.1.1" \ "3,10.0.1.3,signal backup")" "dump addrs after del" add_endpoint 10.0.1.3 2>/dev/null -check "get_endpoint 4" "" "duplicate addr" +check "get_endpoint 4" "" "duplicate addr" 1 add_endpoint 10.0.1.4 flags signal check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" @@ -173,7 +175,7 @@ for i in $(seq 5 9); do add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" -check "get_endpoint 10" "" "above hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" 1 del_endpoint 9 for i in $(seq 10 255); do @@ -192,9 +194,13 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" -add_endpoint 10.0.1.1 flags unknown -check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" -flush_endpoint +# "unknown" flag is only supported by pm_nl_ctl +if ! mptcp_lib_is_ip_mptcp; then + add_endpoint 10.0.1.1 flags unknown + check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \ + "ignore unknown flags" + flush_endpoint +fi set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index b327d3061ed5..3cdd953f6813 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -26,6 +26,7 @@ tests=" netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces tunnel_metadata ovs: test extraction of tunnel metadata + tunnel_refcount ovs: test tunnel vport reference cleanup drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -830,6 +831,42 @@ test_tunnel_metadata() { return 0 } +test_tunnel_refcount() { + sbxname="test_tunnel_refcount" + sbx_add "${sbxname}" || return 1 + + ovs_sbx "${sbxname}" ip netns add trefns || return 1 + on_exit "ovs_sbx ${sbxname} ip netns del trefns" + + for tun_type in gre vxlan geneve; do + info "testing ${tun_type} tunnel vport refcount" + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-dp dp-${tun_type} || return 1 + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-if --no-lwt -t ${tun_type} \ + dp-${tun_type} ovs-${tun_type}0 || return 1 + + ovs_wait ip -netns trefns link show \ + ovs-${tun_type}0 >/dev/null 2>&1 || return 1 + + info "deleting dp - may hang if reference counting is broken" + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + del-dp dp-${tun_type} & + + dev_removed() { + ! ip -netns trefns link show "$1" >/dev/null 2>&1 + } + ovs_wait dev_removed dp-${tun_type} || return 1 + ovs_wait dev_removed ovs-${tun_type}0 || return 1 + done + return 0 +} + run_test() { ( tname="$1" diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 848f61fdcee0..bbe35e2718d2 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -11,7 +11,6 @@ import logging import math import multiprocessing import re -import socket import struct import sys import time @@ -2069,7 +2068,7 @@ class OvsVport(GenericNetlinkSocket): elif vport_type == "internal": return OvsVport.OVS_VPORT_TYPE_INTERNAL elif vport_type == "gre": - return OvsVport.OVS_VPORT_TYPE_INTERNAL + return OvsVport.OVS_VPORT_TYPE_GRE elif vport_type == "vxlan": return OvsVport.OVS_VPORT_TYPE_VXLAN elif vport_type == "geneve": @@ -2121,6 +2120,7 @@ class OvsVport(GenericNetlinkSocket): ) TUNNEL_DEFAULTS = [("geneve", 6081), + ("gre", 0), ("vxlan", 4789)] for tnl in TUNNEL_DEFAULTS: @@ -2129,9 +2129,13 @@ class OvsVport(GenericNetlinkSocket): dport = tnl[1] if not lwt: + if tnl[0] == "gre": + # GRE tunnels have no options. + break + vportopt = OvsVport.ovs_vport_msg.vportopts() vportopt["attrs"].append( - ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ["OVS_TUNNEL_ATTR_DST_PORT", dport] ) msg["attrs"].append( ["OVS_VPORT_ATTR_OPTIONS", vportopt] @@ -2145,6 +2149,9 @@ class OvsVport(GenericNetlinkSocket): geneve_port=dport, geneve_collect_metadata=True, geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "gre": + ipr.link("add", ifname=vport_ifname, kind="gretap", + gre_collect_metadata=True) elif tnl[0] == "vxlan": ipr.link("add", ifname=vport_ifname, kind=tnl[0], vxlan_learning=0, vxlan_collect_metadata=1, @@ -2563,7 +2570,7 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): if vpo: dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") if dpo: - opts += " tnl-dport:%s" % socket.ntohs(dpo) + opts += " tnl-dport:%s" % dpo print( " port %d: %s (%s%s)" % ( @@ -2632,7 +2639,7 @@ def main(argv): "--ptype", type=str, default="netdev", - choices=["netdev", "internal", "geneve", "vxlan"], + choices=["netdev", "internal", "gre", "geneve", "vxlan"], help="Interface type (default netdev)", ) addifcmd.add_argument( @@ -2645,7 +2652,7 @@ def main(argv): addifcmd.add_argument( "-l", "--lwt", - type=bool, + action=argparse.BooleanOptionalAction, default=True, help="Use LWT infrastructure instead of vport (default true)." ) diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index b50dbe45a4d0..c06e3135fbef 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -98,10 +98,10 @@ ovpn_run_basic_traffic() { sleep 0.3 ovpn_cmd_ok "send baseline traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -w 3 5.5.5.$((p + 1)) ovpn_cmd_ok "send large-payload traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -s 3000 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -s 3000 -w 3 5.5.5.$((p + 1)) wait "${tcpdump_pid1}" || return 1 wait "${tcpdump_pid2}" || return 1 diff --git a/tools/testing/selftests/net/tcp_ecmp_failover.sh b/tools/testing/selftests/net/tcp_ecmp_failover.sh new file mode 100755 index 000000000000..5768aa8bff6a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ecmp_failover.sh @@ -0,0 +1,216 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2026 Google LLC. +# +# This test verifies TCP flow failover between ECMP routes +# upon carrier loss on the active device. +# +# socat -----------------------------> socat +# | +# .-- veth-c1 -|- veth-s1 --. +# dummy0 -| | |-- dummy0 +# '-- veth-c2 -|- veth-s2 --' +# | +# + +REQUIRE_JQ=no +REQUIRE_MZ=no +NUM_NETIFS=0 + +source forwarding/lib.sh + +CLIENT_IP="10.0.59.1" +SERVER_IP="10.0.92.1" +CLIENT_IP6="2001:db8:5a9a::1" +SERVER_IP6="2001:db8:9292::1" + +setup_server() +{ + IP="ip -n $server" + NS_EXEC="ip netns exec $server" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $SERVER_IP/32 dev dummy0 + $IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad + + $IP link set veth-s1 up + $IP link set veth-s2 up + + $IP -4 addr add 192.168.1.2/24 dev veth-s1 + $IP -4 addr add 192.168.2.2/24 dev veth-s2 + + $IP -4 route add $CLIENT_IP/32 \ + nexthop via 192.168.1.1 dev veth-s1 weight 1 \ + nexthop via 192.168.2.1 dev veth-s2 weight 1 + + $IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad + $IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad + + $IP -6 route add $CLIENT_IP6/128 \ + nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \ + nexthop via 2001:db8:2::1 dev veth-s2 weight 1 +} + +setup_client() +{ + IP="ip -n $client" + NS_EXEC="ip netns exec $client" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $CLIENT_IP/32 dev dummy0 + $IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad + + $IP link set veth-c1 up + $IP link set veth-c2 up + + $IP -4 addr add 192.168.1.1/24 dev veth-c1 + $IP -4 addr add 192.168.2.1/24 dev veth-c2 + + $IP -4 route add $SERVER_IP/32 \ + nexthop via 192.168.1.2 dev veth-c1 weight 1 \ + nexthop via 192.168.2.2 dev veth-c2 weight 1 + + $IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad + $IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad + + $IP -6 route add $SERVER_IP6/128 \ + nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \ + nexthop via 2001:db8:2::2 dev veth-c2 weight 1 + + # By default, tcp_retries1=3 triggers a route refresh + # after 3 retransmits (~5s). Ensure this never occurs + # for test stability. + $NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100 + + # When NETDEV_CHANGE is issued for a dev tied to an ECMP + # route, RTNH_F_LINKDOWN is flagged and the sernum is + # bumped to invalidate the route via sk_dst_check(). + # + # Without ignore_routes_with_linkdown=1, subsequent + # lookups may still select the same RTNH_F_LINKDOWN route. + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1 + + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1 +} + +setup() +{ + setup_ns client server + + ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server" + ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server" + + setup_server + setup_client +} + +cleanup() +{ + cleanup_all_ns > /dev/null 2>&1 +} + +tcp_ecmp_failover() +{ + local pf=$1; shift + local server_ip=$1; shift + local client_ip=$1; shift + + RET=0 + + tcpdump_start veth-s1 "$server" + tcpdump_start veth-s2 "$server" + + ip netns exec "$server" \ + socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null & + server_pid=$! + + # Wait for server to start listening. + # Sometimes client fails without this sleep. + sleep 1 + + ip netns exec "$client" \ + socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" & + client_pid=$! + + # To capture enough packets. + sleep 3 + + tcpdump_stop veth-s1 + tcpdump_stop veth-s2 + + pkts_s1=$(tcpdump_show veth-s1 | wc -l) + pkts_s2=$(tcpdump_show veth-s2 | wc -l) + + tcpdump_cleanup veth-s1 + tcpdump_cleanup veth-s2 + + # Detect the device chosen by the client + if [ "$pkts_s1" -gt "$pkts_s2" ]; then + veth_down=veth-s1 + veth_up=veth-s2 + else + veth_down=veth-s2 + veth_up=veth-s1 + fi + + # Taking down $veth_down causes its peer to lose carrier, + # triggering NETDEV_CHANGE. This flags RTNH_F_LINKDOWN + # and bumps the sernum for the route associated with that + # peer, invalidating the cached dst in the TCP socket. + # + # Consequently, sk_dst_check() fails, forcing the subsequent + # lookup to select the remaining healthy route via $veth_up. + ip -n "$server" link set "$veth_down" down + + tcpdump_start "$veth_up" "$server" + + # To capture enough packets. + sleep 3 + + tcpdump_stop "$veth_up" + + kill -9 "$client_pid" > /dev/null 2>&1 + kill -9 "$server_pid" > /dev/null 2>&1 + wait 2> /dev/null + + pkts=$(tcpdump_show $veth_up | wc -l) + + tcpdump_cleanup "$veth_up" + + if [ "$pkts" -lt 1000 ]; then + RET=$ksft_fail + fi +} + +test_ipv4() +{ + setup + tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP + log_test "TCP IPv4 failover" + cleanup +} + +test_ipv6() +{ + setup + tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]" + log_test "TCP IPv6 failover" + cleanup +} + +require_command socat +require_command tcpdump + +trap cleanup EXIT + +test_ipv4 +test_ipv6 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9e2ccea13d70..30a236b8e9f7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -946,6 +946,49 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_to_pipe_small) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + size_t total = 0; + int p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + + /* Shrink pipe to 1 page (typically 4096 bytes) to force multiple + * splice iterations for a 16384-byte TLS record. + */ + EXPECT_GE(fcntl(p[1], F_SETPIPE_SZ, 4096), 4096); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + while (total < (size_t)send_len) { + ssize_t spliced, drained; + + spliced = splice(self->cfd, NULL, p[1], NULL, + send_len - total, 0); + EXPECT_GT(spliced, 0); + if (spliced <= 0) + break; + + drained = read(p[0], mem_recv + total, spliced); + EXPECT_EQ(drained, spliced); + if (drained <= 0) + break; + + total += drained; + } + + EXPECT_EQ(total, (size_t)send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + close(p[0]); + close(p[1]); +} + #define MAX_FRAGS 48 TEST_F(tls, splice_short) { diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index eefadd0546d3..b1f856cf62c1 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1136,5 +1136,153 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "7a5f", + "name": "Force red to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "red", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "cdae", + "name": "Force sfb to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "291d", + "name": "Force red to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "red", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "9c6d", + "name": "Force sfb to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] |
