diff options
Diffstat (limited to 'drivers/net/ethernet')
217 files changed, 20612 insertions, 4433 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 4547a1b8b958..7677c745fb30 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -562,7 +562,7 @@ static void el3_common_remove (struct net_device *dev) } #ifdef CONFIG_EISA -static int __init el3_eisa_probe (struct device *device) +static int el3_eisa_probe(struct device *device) { short i; int ioaddr, irq, if_port; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 2839af00f20c..1c5f3b273e6a 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -907,7 +907,7 @@ static struct eisa_device_id vortex_eisa_ids[] = { }; MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids); -static int __init vortex_eisa_probe(struct device *device) +static int vortex_eisa_probe(struct device *device) { void __iomem *ioaddr; struct eisa_device *edev; diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 955d06b9cdba..0b13af8e4070 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -29,6 +29,7 @@ source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" +source "drivers/net/ethernet/aurora/Kconfig" source "drivers/net/ethernet/cadence/Kconfig" source "drivers/net/ethernet/adi/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" @@ -121,6 +122,7 @@ config FEALNX cards. <http://www.myson.com.tw/> source "drivers/net/ethernet/natsemi/Kconfig" +source "drivers/net/ethernet/netronome/Kconfig" source "drivers/net/ethernet/8390/Kconfig" config NET_NETX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 4a2ee98738f0..38dc1a776a2b 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ +obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/ obj-$(CONFIG_NET_CADENCE) += cadence/ obj-$(CONFIG_NET_BFIN) += adi/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ @@ -52,6 +53,7 @@ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ +obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/ obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/ obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/ diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 096531a73124..e0e95a15cab0 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1912,21 +1912,21 @@ static struct platform_driver bfin_mac_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bfin_mii_bus_driver, + &bfin_mac_driver, +}; + static int __init bfin_mac_init(void) { - int ret; - ret = platform_driver_register(&bfin_mii_bus_driver); - if (!ret) - return platform_driver_register(&bfin_mac_driver); - return -ENODEV; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(bfin_mac_init); static void __exit bfin_mac_cleanup(void) { - platform_driver_unregister(&bfin_mac_driver); - platform_driver_unregister(&bfin_mii_bus_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(bfin_mac_cleanup); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index e2afabf3a465..7ccebae9cb48 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1500,10 +1500,11 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENODEV; } - if (!pci_set_dma_mask(pdev, PCNET32_DMA_MASK)) { + err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK); + if (err) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); - return -ENODEV; + return err; } if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c31e691d11fc..db55c9f6e8e1 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -869,7 +869,7 @@ void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) pdata->mdio_bus = NULL; } -struct xgene_mac_ops xgene_gmac_ops = { +const struct xgene_mac_ops xgene_gmac_ops = { .init = xgene_gmac_init, .reset = xgene_gmac_reset, .rx_enable = xgene_gmac_rx_enable, @@ -879,7 +879,7 @@ struct xgene_mac_ops xgene_gmac_ops = { .set_mac_addr = xgene_gmac_set_mac_addr, }; -struct xgene_port_ops xgene_gport_ops = { +const struct xgene_port_ops xgene_gport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_gport_shutdown, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index c153a1dc5ff7..8a9091039ab4 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -340,8 +340,8 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata); void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata); bool xgene_ring_mgr_init(struct xgene_enet_pdata *p); -extern struct xgene_mac_ops xgene_gmac_ops; -extern struct xgene_port_ops xgene_gport_ops; +extern const struct xgene_mac_ops xgene_gmac_ops; +extern const struct xgene_port_ops xgene_gport_ops; extern struct xgene_ring_ops xgene_ring1_ops; #endif /* __XGENE_ENET_HW_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 991412ce6f48..2394191ad28e 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -450,12 +450,12 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - pdata->ring_ops->wr_cmd(tx_ring, count); skb_tx_timestamp(skb); pdata->stats.tx_packets++; pdata->stats.tx_bytes += skb->len; + pdata->ring_ops->wr_cmd(tx_ring, count); return NETDEV_TX_OK; } @@ -682,16 +682,16 @@ static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata) static int xgene_enet_open(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct xgene_mac_ops *mac_ops = pdata->mac_ops; + const struct xgene_mac_ops *mac_ops = pdata->mac_ops; int ret; mac_ops->tx_enable(pdata); mac_ops->rx_enable(pdata); + xgene_enet_napi_enable(pdata); ret = xgene_enet_register_irq(ndev); if (ret) return ret; - xgene_enet_napi_enable(pdata); if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) phy_start(pdata->phy_dev); @@ -706,7 +706,7 @@ static int xgene_enet_open(struct net_device *ndev) static int xgene_enet_close(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct xgene_mac_ops *mac_ops = pdata->mac_ops; + const struct xgene_mac_ops *mac_ops = pdata->mac_ops; netif_stop_queue(ndev); @@ -715,13 +715,13 @@ static int xgene_enet_close(struct net_device *ndev) else cancel_delayed_work_sync(&pdata->link_work); - xgene_enet_napi_disable(pdata); - xgene_enet_free_irq(ndev); - xgene_enet_process_ring(pdata->rx_ring, -1); - mac_ops->tx_disable(pdata); mac_ops->rx_disable(pdata); + xgene_enet_free_irq(ndev); + xgene_enet_napi_disable(pdata); + xgene_enet_process_ring(pdata->rx_ring, -1); + return 0; } @@ -1084,7 +1084,7 @@ static const struct net_device_ops xgene_ndev_ops = { }; #ifdef CONFIG_ACPI -static int xgene_get_port_id_acpi(struct device *dev, +static void xgene_get_port_id_acpi(struct device *dev, struct xgene_enet_pdata *pdata) { acpi_status status; @@ -1097,24 +1097,19 @@ static int xgene_get_port_id_acpi(struct device *dev, pdata->port_id = temp; } - return 0; + return; } #endif -static int xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) +static void xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) { u32 id = 0; - int ret; - ret = of_property_read_u32(dev->of_node, "port-id", &id); - if (ret) { - pdata->port_id = 0; - ret = 0; - } else { - pdata->port_id = id & BIT(0); - } + of_property_read_u32(dev->of_node, "port-id", &id); - return ret; + pdata->port_id = id & BIT(0); + + return; } static int xgene_get_tx_delay(struct xgene_enet_pdata *pdata) @@ -1209,13 +1204,11 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) } if (dev->of_node) - ret = xgene_get_port_id_dt(dev, pdata); + xgene_get_port_id_dt(dev, pdata); #ifdef CONFIG_ACPI else - ret = xgene_get_port_id_acpi(dev, pdata); + xgene_get_port_id_acpi(dev, pdata); #endif - if (ret) - return ret; if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN)) eth_hw_addr_random(ndev); @@ -1423,7 +1416,7 @@ static int xgene_enet_probe(struct platform_device *pdev) struct net_device *ndev; struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; - struct xgene_mac_ops *mac_ops; + const struct xgene_mac_ops *mac_ops; const struct of_device_id *of_id; int ret; @@ -1474,15 +1467,15 @@ static int xgene_enet_probe(struct platform_device *pdev) } ndev->hw_features = ndev->features; - ret = register_netdev(ndev); + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { - netdev_err(ndev, "Failed to register netdev\n"); + netdev_err(ndev, "No usable DMA configuration\n"); goto err; } - ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); + ret = register_netdev(ndev); if (ret) { - netdev_err(ndev, "No usable DMA configuration\n"); + netdev_err(ndev, "Failed to register netdev\n"); goto err; } @@ -1490,14 +1483,17 @@ static int xgene_enet_probe(struct platform_device *pdev) if (ret) goto err; - xgene_enet_napi_add(pdata); mac_ops = pdata->mac_ops; - if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) + if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { ret = xgene_enet_mdio_config(pdata); - else + if (ret) + goto err; + } else { INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state); + } - return ret; + xgene_enet_napi_add(pdata); + return 0; err: unregister_netdev(ndev); free_netdev(ndev); @@ -1507,7 +1503,7 @@ err: static int xgene_enet_remove(struct platform_device *pdev) { struct xgene_enet_pdata *pdata; - struct xgene_mac_ops *mac_ops; + const struct xgene_mac_ops *mac_ops; struct net_device *ndev; pdata = platform_get_drvdata(pdev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index a6e56b88c0a0..054caf055f0a 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -174,8 +174,8 @@ struct xgene_enet_pdata { int phy_mode; enum xgene_enet_rm rm; struct rtnl_link_stats64 stats; - struct xgene_mac_ops *mac_ops; - struct xgene_port_ops *port_ops; + const struct xgene_mac_ops *mac_ops; + const struct xgene_port_ops *port_ops; struct xgene_ring_ops *ring_ops; struct delayed_work link_work; u32 port_id; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index 05b817e56fde..78475512b683 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -405,7 +405,7 @@ static void xgene_enet_link_state(struct work_struct *work) schedule_delayed_work(&p->link_work, poll_interval); } -struct xgene_mac_ops xgene_sgmac_ops = { +const struct xgene_mac_ops xgene_sgmac_ops = { .init = xgene_sgmac_init, .reset = xgene_sgmac_reset, .rx_enable = xgene_sgmac_rx_enable, @@ -416,7 +416,7 @@ struct xgene_mac_ops xgene_sgmac_ops = { .link_state = xgene_enet_link_state }; -struct xgene_port_ops xgene_sgport_ops = { +const struct xgene_port_ops xgene_sgport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_enet_shutdown diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h index de432465009c..29a71b4dcc44 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h @@ -35,7 +35,7 @@ #define MPA_IDLE_WITH_QMI_EMPTY BIT(12) #define SG_RX_DV_GATE_REG_0_ADDR 0x0dfc -extern struct xgene_mac_ops xgene_sgmac_ops; -extern struct xgene_port_ops xgene_sgport_ops; +extern const struct xgene_mac_ops xgene_sgmac_ops; +extern const struct xgene_port_ops xgene_sgport_ops; #endif /* __XGENE_ENET_SGMAC_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 7a28a48cb2c7..ba030dc1940b 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -326,7 +326,7 @@ static void xgene_enet_link_state(struct work_struct *work) schedule_delayed_work(&pdata->link_work, poll_interval); } -struct xgene_mac_ops xgene_xgmac_ops = { +const struct xgene_mac_ops xgene_xgmac_ops = { .init = xgene_xgmac_init, .reset = xgene_xgmac_reset, .rx_enable = xgene_xgmac_rx_enable, @@ -338,7 +338,7 @@ struct xgene_mac_ops xgene_xgmac_ops = { .link_state = xgene_enet_link_state }; -struct xgene_port_ops xgene_xgport_ops = { +const struct xgene_port_ops xgene_xgport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_xgcle_bypass, .shutdown = xgene_enet_shutdown, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index f8f908dbf51c..0a2dca8a1725 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -69,7 +69,7 @@ #define XG_ENET_SPARE_CFG_REG_1_ADDR 0x0410 #define XGENET_RX_DV_GATE_REG_0_ADDR 0x0804 -extern struct xgene_mac_ops xgene_xgmac_ops; -extern struct xgene_port_ops xgene_xgport_ops; +extern const struct xgene_mac_ops xgene_xgmac_ops; +extern const struct xgene_port_ops xgene_xgport_ops; #endif /* __XGENE_ENET_XGMAC_H__ */ diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c8af3ce3ea38..d3763bc2c561 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -577,7 +577,6 @@ static int alx_alloc_rings(struct alx_priv *alx) alx->int_mask &= ~ALX_ISR_ALL_QUEUES; alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx->tx_ringsz = alx->tx_ringsz; netif_napi_add(alx->dev, &alx->napi, alx_poll, 64); @@ -1534,6 +1533,8 @@ static const struct pci_device_id alx_pci_tbl[] = { .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, + { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400), + .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) }, diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h index af006b44b2a6..0959e6824cb6 100644 --- a/drivers/net/ethernet/atheros/alx/reg.h +++ b/drivers/net/ethernet/atheros/alx/reg.h @@ -37,6 +37,7 @@ #define ALX_DEV_ID_AR8161 0x1091 #define ALX_DEV_ID_E2200 0xe091 +#define ALX_DEV_ID_E2400 0xe0a1 #define ALX_DEV_ID_AR8162 0x1090 #define ALX_DEV_ID_AR8171 0x10A1 #define ALX_DEV_ID_AR8172 0x10A0 diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig new file mode 100644 index 000000000000..a3c7106fdf85 --- /dev/null +++ b/drivers/net/ethernet/aurora/Kconfig @@ -0,0 +1,20 @@ +config NET_VENDOR_AURORA + bool "Aurora VLSI devices" + help + If you have a network (Ethernet) device belonging to this class, + say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + questions about Aurora devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AURORA + +config AURORA_NB8800 + tristate "Aurora AU-NB8800 support" + select PHYLIB + help + Support for the AU-NB8800 gigabit Ethernet controller. + +endif diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile new file mode 100644 index 000000000000..6cb528a2fc26 --- /dev/null +++ b/drivers/net/ethernet/aurora/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_AURORA_NB8800) += nb8800.o diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c new file mode 100644 index 000000000000..ecc4a334c507 --- /dev/null +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -0,0 +1,1552 @@ +/* + * Copyright (C) 2015 Mans Rullgard <mans@mansr.com> + * + * Mostly rewritten, based on driver from Sigma Designs. Original + * copyright notice below. + * + * + * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac. + * + * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/ethtool.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/of_device.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/dma-mapping.h> +#include <linux/phy.h> +#include <linux/cache.h> +#include <linux/jiffies.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <asm/barrier.h> + +#include "nb8800.h" + +static void nb8800_tx_done(struct net_device *dev); +static int nb8800_dma_stop(struct net_device *dev); + +static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg) +{ + return readb_relaxed(priv->base + reg); +} + +static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg) +{ + return readl_relaxed(priv->base + reg); +} + +static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val) +{ + writeb_relaxed(val, priv->base + reg); +} + +static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val) +{ + writew_relaxed(val, priv->base + reg); +} + +static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val) +{ + writel_relaxed(val, priv->base + reg); +} + +static inline void nb8800_maskb(struct nb8800_priv *priv, int reg, + u32 mask, u32 val) +{ + u32 old = nb8800_readb(priv, reg); + u32 new = (old & ~mask) | (val & mask); + + if (new != old) + nb8800_writeb(priv, reg, new); +} + +static inline void nb8800_maskl(struct nb8800_priv *priv, int reg, + u32 mask, u32 val) +{ + u32 old = nb8800_readl(priv, reg); + u32 new = (old & ~mask) | (val & mask); + + if (new != old) + nb8800_writel(priv, reg, new); +} + +static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits, + bool set) +{ + nb8800_maskb(priv, reg, bits, set ? bits : 0); +} + +static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits) +{ + nb8800_maskb(priv, reg, bits, bits); +} + +static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits) +{ + nb8800_maskb(priv, reg, bits, 0); +} + +static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits, + bool set) +{ + nb8800_maskl(priv, reg, bits, set ? bits : 0); +} + +static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits) +{ + nb8800_maskl(priv, reg, bits, bits); +} + +static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits) +{ + nb8800_maskl(priv, reg, bits, 0); +} + +static int nb8800_mdio_wait(struct mii_bus *bus) +{ + struct nb8800_priv *priv = bus->priv; + u32 val; + + return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD, + val, !(val & MDIO_CMD_GO), 1, 1000); +} + +static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd) +{ + struct nb8800_priv *priv = bus->priv; + int err; + + err = nb8800_mdio_wait(bus); + if (err) + return err; + + nb8800_writel(priv, NB8800_MDIO_CMD, cmd); + udelay(10); + nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO); + + return nb8800_mdio_wait(bus); +} + +static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg) +{ + struct nb8800_priv *priv = bus->priv; + u32 val; + int err; + + err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg)); + if (err) + return err; + + val = nb8800_readl(priv, NB8800_MDIO_STS); + if (val & MDIO_STS_ERR) + return 0xffff; + + return val & 0xffff; +} + +static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) +{ + u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) | + MDIO_CMD_DATA(val) | MDIO_CMD_WR; + + return nb8800_mdio_cmd(bus, cmd); +} + +static void nb8800_mac_tx(struct net_device *dev, bool enable) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN) + cpu_relax(); + + nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable); +} + +static void nb8800_mac_rx(struct net_device *dev, bool enable) +{ + nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable); +} + +static void nb8800_mac_af(struct net_device *dev, bool enable) +{ + nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable); +} + +static void nb8800_start_rx(struct net_device *dev) +{ + nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN); +} + +static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; + struct nb8800_rx_buf *rxb = &priv->rx_bufs[i]; + int size = L1_CACHE_ALIGN(RX_BUF_SIZE); + dma_addr_t dma_addr; + struct page *page; + unsigned long offset; + void *data; + + data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size); + if (!data) + return -ENOMEM; + + page = virt_to_head_page(data); + offset = data - page_address(page); + + dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE, + DMA_FROM_DEVICE); + + if (dma_mapping_error(&dev->dev, dma_addr)) { + skb_free_frag(data); + return -ENOMEM; + } + + rxb->page = page; + rxb->offset = offset; + rxd->desc.s_addr = dma_addr; + + return 0; +} + +static void nb8800_receive(struct net_device *dev, unsigned int i, + unsigned int len) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; + struct page *page = priv->rx_bufs[i].page; + int offset = priv->rx_bufs[i].offset; + void *data = page_address(page) + offset; + dma_addr_t dma = rxd->desc.s_addr; + struct sk_buff *skb; + unsigned int size; + int err; + + size = len <= RX_COPYBREAK ? len : RX_COPYHDR; + + skb = napi_alloc_skb(&priv->napi, size); + if (!skb) { + netdev_err(dev, "rx skb allocation failed\n"); + dev->stats.rx_dropped++; + return; + } + + if (len <= RX_COPYBREAK) { + dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE); + memcpy(skb_put(skb, len), data, len); + dma_sync_single_for_device(&dev->dev, dma, len, + DMA_FROM_DEVICE); + } else { + err = nb8800_alloc_rx(dev, i, true); + if (err) { + netdev_err(dev, "rx buffer allocation failed\n"); + dev->stats.rx_dropped++; + return; + } + + dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE); + memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + offset + RX_COPYHDR, len - RX_COPYHDR, + RX_BUF_SIZE); + } + + skb->protocol = eth_type_trans(skb, dev); + napi_gro_receive(&priv->napi, skb); +} + +static void nb8800_rx_error(struct net_device *dev, u32 report) +{ + if (report & RX_LENGTH_ERR) + dev->stats.rx_length_errors++; + + if (report & RX_FCS_ERR) + dev->stats.rx_crc_errors++; + + if (report & RX_FIFO_OVERRUN) + dev->stats.rx_fifo_errors++; + + if (report & RX_ALIGNMENT_ERROR) + dev->stats.rx_frame_errors++; + + dev->stats.rx_errors++; +} + +static int nb8800_poll(struct napi_struct *napi, int budget) +{ + struct net_device *dev = napi->dev; + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd; + unsigned int last = priv->rx_eoc; + unsigned int next; + int work = 0; + + nb8800_tx_done(dev); + +again: + while (work < budget) { + struct nb8800_rx_buf *rxb; + unsigned int len; + + next = (last + 1) % RX_DESC_COUNT; + + rxb = &priv->rx_bufs[next]; + rxd = &priv->rx_descs[next]; + + if (!rxd->report) + break; + + len = RX_BYTES_TRANSFERRED(rxd->report); + + if (IS_RX_ERROR(rxd->report)) + nb8800_rx_error(dev, rxd->report); + else + nb8800_receive(dev, next, len); + + dev->stats.rx_packets++; + dev->stats.rx_bytes += len; + + if (rxd->report & RX_MULTICAST_PKT) + dev->stats.multicast++; + + rxd->report = 0; + last = next; + work++; + } + + if (work) { + priv->rx_descs[last].desc.config |= DESC_EOC; + wmb(); /* ensure new EOC is written before clearing old */ + priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC; + priv->rx_eoc = last; + nb8800_start_rx(dev); + } + + if (work < budget) { + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); + + /* If a packet arrived after we last checked but + * before writing RX_ITR, the interrupt will be + * delayed, so we retrieve it now. + */ + if (priv->rx_descs[next].report) + goto again; + + napi_complete_done(napi, work); + } + + return work; +} + +static void __nb8800_tx_dma_start(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_buf *txb; + u32 txc_cr; + + txb = &priv->tx_bufs[priv->tx_queue]; + if (!txb->ready) + return; + + txc_cr = nb8800_readl(priv, NB8800_TXC_CR); + if (txc_cr & TCR_EN) + return; + + nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); + wmb(); /* ensure desc addr is written before starting DMA */ + nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN); + + priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT; +} + +static void nb8800_tx_dma_start(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + spin_lock_irq(&priv->tx_lock); + __nb8800_tx_dma_start(dev); + spin_unlock_irq(&priv->tx_lock); +} + +static void nb8800_tx_dma_start_irq(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + spin_lock(&priv->tx_lock); + __nb8800_tx_dma_start(dev); + spin_unlock(&priv->tx_lock); +} + +static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_desc *txd; + struct nb8800_tx_buf *txb; + struct nb8800_dma_desc *desc; + dma_addr_t dma_addr; + unsigned int dma_len; + unsigned int align; + unsigned int next; + + if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) { + netif_stop_queue(dev); + return NETDEV_TX_BUSY; + } + + align = (8 - (uintptr_t)skb->data) & 7; + + dma_len = skb->len - align; + dma_addr = dma_map_single(&dev->dev, skb->data + align, + dma_len, DMA_TO_DEVICE); + + if (dma_mapping_error(&dev->dev, dma_addr)) { + netdev_err(dev, "tx dma mapping error\n"); + kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) { + netif_stop_queue(dev); + skb->xmit_more = 0; + } + + next = priv->tx_next; + txb = &priv->tx_bufs[next]; + txd = &priv->tx_descs[next]; + desc = &txd->desc[0]; + + next = (next + 1) % TX_DESC_COUNT; + + if (align) { + memcpy(txd->buf, skb->data, align); + + desc->s_addr = + txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); + desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]); + desc->config = DESC_BTS(2) | DESC_DS | align; + + desc++; + } + + desc->s_addr = dma_addr; + desc->n_addr = priv->tx_bufs[next].dma_desc; + desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len; + + if (!skb->xmit_more) + desc->config |= DESC_EOC; + + txb->skb = skb; + txb->dma_addr = dma_addr; + txb->dma_len = dma_len; + + if (!priv->tx_chain) { + txb->chain_len = 1; + priv->tx_chain = txb; + } else { + priv->tx_chain->chain_len++; + } + + netdev_sent_queue(dev, skb->len); + + priv->tx_next = next; + + if (!skb->xmit_more) { + smp_wmb(); + priv->tx_chain->ready = true; + priv->tx_chain = NULL; + nb8800_tx_dma_start(dev); + } + + return NETDEV_TX_OK; +} + +static void nb8800_tx_error(struct net_device *dev, u32 report) +{ + if (report & TX_LATE_COLLISION) + dev->stats.collisions++; + + if (report & TX_PACKET_DROPPED) + dev->stats.tx_dropped++; + + if (report & TX_FIFO_UNDERRUN) + dev->stats.tx_fifo_errors++; + + dev->stats.tx_errors++; +} + +static void nb8800_tx_done(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int limit = priv->tx_next; + unsigned int done = priv->tx_done; + unsigned int packets = 0; + unsigned int len = 0; + + while (done != limit) { + struct nb8800_tx_desc *txd = &priv->tx_descs[done]; + struct nb8800_tx_buf *txb = &priv->tx_bufs[done]; + struct sk_buff *skb; + + if (!txd->report) + break; + + skb = txb->skb; + len += skb->len; + + dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len, + DMA_TO_DEVICE); + + if (IS_TX_ERROR(txd->report)) { + nb8800_tx_error(dev, txd->report); + kfree_skb(skb); + } else { + consume_skb(skb); + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report); + dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report); + + txb->skb = NULL; + txb->ready = false; + txd->report = 0; + + done = (done + 1) % TX_DESC_COUNT; + packets++; + } + + if (packets) { + smp_mb__before_atomic(); + atomic_add(packets, &priv->tx_free); + netdev_completed_queue(dev, packets, len); + netif_wake_queue(dev); + priv->tx_done = done; + } +} + +static irqreturn_t nb8800_irq(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct nb8800_priv *priv = netdev_priv(dev); + irqreturn_t ret = IRQ_NONE; + u32 val; + + /* tx interrupt */ + val = nb8800_readl(priv, NB8800_TXC_SR); + if (val) { + nb8800_writel(priv, NB8800_TXC_SR, val); + + if (val & TSR_DI) + nb8800_tx_dma_start_irq(dev); + + if (val & TSR_TI) + napi_schedule_irqoff(&priv->napi); + + if (unlikely(val & TSR_DE)) + netdev_err(dev, "TX DMA error\n"); + + /* should never happen with automatic status retrieval */ + if (unlikely(val & TSR_TO)) + netdev_err(dev, "TX Status FIFO overflow\n"); + + ret = IRQ_HANDLED; + } + + /* rx interrupt */ + val = nb8800_readl(priv, NB8800_RXC_SR); + if (val) { + nb8800_writel(priv, NB8800_RXC_SR, val); + + if (likely(val & (RSR_RI | RSR_DI))) { + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll); + napi_schedule_irqoff(&priv->napi); + } + + if (unlikely(val & RSR_DE)) + netdev_err(dev, "RX DMA error\n"); + + /* should never happen with automatic status retrieval */ + if (unlikely(val & RSR_RO)) + netdev_err(dev, "RX Status FIFO overflow\n"); + + ret = IRQ_HANDLED; + } + + return ret; +} + +static void nb8800_mac_config(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + bool gigabit = priv->speed == SPEED_1000; + u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE; + u32 mac_mode = 0; + u32 slot_time; + u32 phy_clk; + u32 ict; + + if (!priv->duplex) + mac_mode |= HALF_DUPLEX; + + if (gigabit) { + if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) + mac_mode |= RGMII_MODE; + + mac_mode |= GMAC_MODE; + phy_clk = 125000000; + + /* Should be 512 but register is only 8 bits */ + slot_time = 255; + } else { + phy_clk = 25000000; + slot_time = 128; + } + + ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk)); + + nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict); + nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time); + nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode); +} + +static void nb8800_pause_config(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + u32 rxcr; + + if (priv->pause_aneg) { + if (!phydev || !phydev->link) + return; + + priv->pause_rx = phydev->pause; + priv->pause_tx = phydev->pause ^ phydev->asym_pause; + } + + nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx); + + rxcr = nb8800_readl(priv, NB8800_RXC_CR); + if (!!(rxcr & RCR_FL) == priv->pause_tx) + return; + + if (netif_running(dev)) { + napi_disable(&priv->napi); + netif_tx_lock_bh(dev); + nb8800_dma_stop(dev); + nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); + nb8800_start_rx(dev); + netif_tx_unlock_bh(dev); + napi_enable(&priv->napi); + } else { + nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); + } +} + +static void nb8800_link_reconfigure(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + int change = 0; + + if (phydev->link) { + if (phydev->speed != priv->speed) { + priv->speed = phydev->speed; + change = 1; + } + + if (phydev->duplex != priv->duplex) { + priv->duplex = phydev->duplex; + change = 1; + } + + if (change) + nb8800_mac_config(dev); + + nb8800_pause_config(dev); + } + + if (phydev->link != priv->link) { + priv->link = phydev->link; + change = 1; + } + + if (change) + phy_print_status(priv->phydev); +} + +static void nb8800_update_mac_addr(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]); + + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]); +} + +static int nb8800_set_mac_address(struct net_device *dev, void *addr) +{ + struct sockaddr *sock = addr; + + if (netif_running(dev)) + return -EBUSY; + + ether_addr_copy(dev->dev_addr, sock->sa_data); + nb8800_update_mac_addr(dev); + + return 0; +} + +static void nb8800_mc_init(struct net_device *dev, int val) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + nb8800_writeb(priv, NB8800_MC_INIT, val); + readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val, + 1, 1000); +} + +static void nb8800_set_rx_mode(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct netdev_hw_addr *ha; + int i; + + if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { + nb8800_mac_af(dev, false); + return; + } + + nb8800_mac_af(dev, true); + nb8800_mc_init(dev, 0); + + netdev_for_each_mc_addr(ha, dev) { + for (i = 0; i < ETH_ALEN; i++) + nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]); + + nb8800_mc_init(dev, 0xff); + } +} + +#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc)) +#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc)) + +static void nb8800_dma_free(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int i; + + if (priv->rx_bufs) { + for (i = 0; i < RX_DESC_COUNT; i++) + if (priv->rx_bufs[i].page) + put_page(priv->rx_bufs[i].page); + + kfree(priv->rx_bufs); + priv->rx_bufs = NULL; + } + + if (priv->tx_bufs) { + for (i = 0; i < TX_DESC_COUNT; i++) + kfree_skb(priv->tx_bufs[i].skb); + + kfree(priv->tx_bufs); + priv->tx_bufs = NULL; + } + + if (priv->rx_descs) { + dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs, + priv->rx_desc_dma); + priv->rx_descs = NULL; + } + + if (priv->tx_descs) { + dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs, + priv->tx_desc_dma); + priv->tx_descs = NULL; + } +} + +static void nb8800_dma_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_rx_desc *rxd; + struct nb8800_tx_desc *txd; + unsigned int i; + + for (i = 0; i < RX_DESC_COUNT; i++) { + dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd); + + rxd = &priv->rx_descs[i]; + rxd->desc.n_addr = rx_dma + sizeof(*rxd); + rxd->desc.r_addr = + rx_dma + offsetof(struct nb8800_rx_desc, report); + rxd->desc.config = priv->rx_dma_config; + rxd->report = 0; + } + + rxd->desc.n_addr = priv->rx_desc_dma; + rxd->desc.config |= DESC_EOC; + + priv->rx_eoc = RX_DESC_COUNT - 1; + + for (i = 0; i < TX_DESC_COUNT; i++) { + struct nb8800_tx_buf *txb = &priv->tx_bufs[i]; + dma_addr_t r_dma = txb->dma_desc + + offsetof(struct nb8800_tx_desc, report); + + txd = &priv->tx_descs[i]; + txd->desc[0].r_addr = r_dma; + txd->desc[1].r_addr = r_dma; + txd->report = 0; + } + + priv->tx_next = 0; + priv->tx_queue = 0; + priv->tx_done = 0; + atomic_set(&priv->tx_free, TX_DESC_COUNT); + + nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma); + + wmb(); /* ensure all setup is written before starting */ +} + +static int nb8800_dma_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + unsigned int n_rx = RX_DESC_COUNT; + unsigned int n_tx = TX_DESC_COUNT; + unsigned int i; + int err; + + priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE, + &priv->rx_desc_dma, GFP_KERNEL); + if (!priv->rx_descs) + goto err_out; + + priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL); + if (!priv->rx_bufs) + goto err_out; + + for (i = 0; i < n_rx; i++) { + err = nb8800_alloc_rx(dev, i, false); + if (err) + goto err_out; + } + + priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE, + &priv->tx_desc_dma, GFP_KERNEL); + if (!priv->tx_descs) + goto err_out; + + priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL); + if (!priv->tx_bufs) + goto err_out; + + for (i = 0; i < n_tx; i++) + priv->tx_bufs[i].dma_desc = + priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc); + + nb8800_dma_reset(dev); + + return 0; + +err_out: + nb8800_dma_free(dev); + + return -ENOMEM; +} + +static int nb8800_dma_stop(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + struct nb8800_tx_buf *txb = &priv->tx_bufs[0]; + struct nb8800_tx_desc *txd = &priv->tx_descs[0]; + int retry = 5; + u32 txcr; + u32 rxcr; + int err; + unsigned int i; + + /* wait for tx to finish */ + err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr, + !(txcr & TCR_EN) && + priv->tx_done == priv->tx_next, + 1000, 1000000); + if (err) + return err; + + /* The rx DMA only stops if it reaches the end of chain. + * To make this happen, we set the EOC flag on all rx + * descriptors, put the device in loopback mode, and send + * a few dummy frames. The interrupt handler will ignore + * these since NAPI is disabled and no real frames are in + * the tx queue. + */ + + for (i = 0; i < RX_DESC_COUNT; i++) + priv->rx_descs[i].desc.config |= DESC_EOC; + + txd->desc[0].s_addr = + txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); + txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8; + memset(txd->buf, 0, sizeof(txd->buf)); + + nb8800_mac_af(dev, false); + nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN); + + do { + nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); + wmb(); + nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN); + + err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR, + rxcr, !(rxcr & RCR_EN), + 1000, 100000); + } while (err && --retry); + + nb8800_mac_af(dev, true); + nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN); + nb8800_dma_reset(dev); + + return retry ? 0 : -ETIMEDOUT; +} + +static void nb8800_pause_adv(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 adv = 0; + + if (!priv->phydev) + return; + + if (priv->pause_rx) + adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + if (priv->pause_tx) + adv ^= ADVERTISED_Asym_Pause; + + priv->phydev->supported |= adv; + priv->phydev->advertising |= adv; +} + +static int nb8800_open(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int err; + + /* clear any pending interrupts */ + nb8800_writel(priv, NB8800_RXC_SR, 0xf); + nb8800_writel(priv, NB8800_TXC_SR, 0xf); + + err = nb8800_dma_init(dev); + if (err) + return err; + + err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev); + if (err) + goto err_free_dma; + + nb8800_mac_rx(dev, true); + nb8800_mac_tx(dev, true); + + priv->phydev = of_phy_connect(dev, priv->phy_node, + nb8800_link_reconfigure, 0, + priv->phy_mode); + if (!priv->phydev) + goto err_free_irq; + + nb8800_pause_adv(dev); + + netdev_reset_queue(dev); + napi_enable(&priv->napi); + netif_start_queue(dev); + + nb8800_start_rx(dev); + phy_start(priv->phydev); + + return 0; + +err_free_irq: + free_irq(dev->irq, dev); +err_free_dma: + nb8800_dma_free(dev); + + return err; +} + +static int nb8800_stop(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + phy_stop(priv->phydev); + + netif_stop_queue(dev); + napi_disable(&priv->napi); + + nb8800_dma_stop(dev); + nb8800_mac_rx(dev, false); + nb8800_mac_tx(dev, false); + + phy_disconnect(priv->phydev); + priv->phydev = NULL; + + free_irq(dev->irq, dev); + + nb8800_dma_free(dev); + + return 0; +} + +static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + return phy_mii_ioctl(priv->phydev, rq, cmd); +} + +static const struct net_device_ops nb8800_netdev_ops = { + .ndo_open = nb8800_open, + .ndo_stop = nb8800_stop, + .ndo_start_xmit = nb8800_xmit, + .ndo_set_mac_address = nb8800_set_mac_address, + .ndo_set_rx_mode = nb8800_set_rx_mode, + .ndo_do_ioctl = nb8800_ioctl, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + +static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return phy_ethtool_gset(priv->phydev, cmd); +} + +static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return phy_ethtool_sset(priv->phydev, cmd); +} + +static int nb8800_nway_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + if (!priv->phydev) + return -ENODEV; + + return genphy_restart_aneg(priv->phydev); +} + +static void nb8800_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pp) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + pp->autoneg = priv->pause_aneg; + pp->rx_pause = priv->pause_rx; + pp->tx_pause = priv->pause_tx; +} + +static int nb8800_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pp) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + priv->pause_aneg = pp->autoneg; + priv->pause_rx = pp->rx_pause; + priv->pause_tx = pp->tx_pause; + + nb8800_pause_adv(dev); + + if (!priv->pause_aneg) + nb8800_pause_config(dev); + else if (priv->phydev) + phy_start_aneg(priv->phydev); + + return 0; +} + +static const char nb8800_stats_names[][ETH_GSTRING_LEN] = { + "rx_bytes_ok", + "rx_frames_ok", + "rx_undersize_frames", + "rx_fragment_frames", + "rx_64_byte_frames", + "rx_127_byte_frames", + "rx_255_byte_frames", + "rx_511_byte_frames", + "rx_1023_byte_frames", + "rx_max_size_frames", + "rx_oversize_frames", + "rx_bad_fcs_frames", + "rx_broadcast_frames", + "rx_multicast_frames", + "rx_control_frames", + "rx_pause_frames", + "rx_unsup_control_frames", + "rx_align_error_frames", + "rx_overrun_frames", + "rx_jabber_frames", + "rx_bytes", + "rx_frames", + + "tx_bytes_ok", + "tx_frames_ok", + "tx_64_byte_frames", + "tx_127_byte_frames", + "tx_255_byte_frames", + "tx_511_byte_frames", + "tx_1023_byte_frames", + "tx_max_size_frames", + "tx_oversize_frames", + "tx_broadcast_frames", + "tx_multicast_frames", + "tx_control_frames", + "tx_pause_frames", + "tx_underrun_frames", + "tx_single_collision_frames", + "tx_multi_collision_frames", + "tx_deferred_collision_frames", + "tx_late_collision_frames", + "tx_excessive_collision_frames", + "tx_bytes", + "tx_frames", + "tx_collisions", +}; + +#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names) + +static int nb8800_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) + return NB8800_NUM_STATS; + + return -EOPNOTSUPP; +} + +static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf) +{ + if (sset == ETH_SS_STATS) + memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names)); +} + +static u32 nb8800_read_stat(struct net_device *dev, int index) +{ + struct nb8800_priv *priv = netdev_priv(dev); + + nb8800_writeb(priv, NB8800_STAT_INDEX, index); + + return nb8800_readl(priv, NB8800_STAT_DATA); +} + +static void nb8800_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *estats, u64 *st) +{ + unsigned int i; + u32 rx, tx; + + for (i = 0; i < NB8800_NUM_STATS / 2; i++) { + rx = nb8800_read_stat(dev, i); + tx = nb8800_read_stat(dev, i | 0x80); + st[i] = rx; + st[i + NB8800_NUM_STATS / 2] = tx; + } +} + +static const struct ethtool_ops nb8800_ethtool_ops = { + .get_settings = nb8800_get_settings, + .set_settings = nb8800_set_settings, + .nway_reset = nb8800_nway_reset, + .get_link = ethtool_op_get_link, + .get_pauseparam = nb8800_get_pauseparam, + .set_pauseparam = nb8800_set_pauseparam, + .get_sset_count = nb8800_get_sset_count, + .get_strings = nb8800_get_strings, + .get_ethtool_stats = nb8800_get_ethtool_stats, +}; + +static int nb8800_hw_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 val; + + val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS; + nb8800_writeb(priv, NB8800_TX_CTL1, val); + + /* Collision retry count */ + nb8800_writeb(priv, NB8800_TX_CTL2, 5); + + val = RX_PAD_STRIP | RX_AF_EN; + nb8800_writeb(priv, NB8800_RX_CTL, val); + + /* Chosen by fair dice roll */ + nb8800_writeb(priv, NB8800_RANDOM_SEED, 4); + + /* TX cycles per deferral period */ + nb8800_writeb(priv, NB8800_TX_SDP, 12); + + /* The following three threshold values have been + * experimentally determined for good results. + */ + + /* RX/TX FIFO threshold for partial empty (64-bit entries) */ + nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0); + + /* RX/TX FIFO threshold for partial full (64-bit entries) */ + nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255); + + /* Buffer size for transmit (64-bit entries) */ + nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64); + + /* Configure tx DMA */ + + val = nb8800_readl(priv, NB8800_TXC_CR); + val &= TCR_LE; /* keep endian setting */ + val |= TCR_DM; /* DMA descriptor mode */ + val |= TCR_RS; /* automatically store tx status */ + val |= TCR_DIE; /* interrupt on DMA chain completion */ + val |= TCR_TFI(7); /* interrupt after 7 frames transmitted */ + val |= TCR_BTS(2); /* 32-byte bus transaction size */ + nb8800_writel(priv, NB8800_TXC_CR, val); + + /* TX complete interrupt after 10 ms or 7 frames (see above) */ + val = clk_get_rate(priv->clk) / 100; + nb8800_writel(priv, NB8800_TX_ITR, val); + + /* Configure rx DMA */ + + val = nb8800_readl(priv, NB8800_RXC_CR); + val &= RCR_LE; /* keep endian setting */ + val |= RCR_DM; /* DMA descriptor mode */ + val |= RCR_RS; /* automatically store rx status */ + val |= RCR_DIE; /* interrupt at end of DMA chain */ + val |= RCR_RFI(7); /* interrupt after 7 frames received */ + val |= RCR_BTS(2); /* 32-byte bus transaction size */ + nb8800_writel(priv, NB8800_RXC_CR, val); + + /* The rx interrupt can fire before the DMA has completed + * unless a small delay is added. 50 us is hopefully enough. + */ + priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000; + + /* In NAPI poll mode we want to disable interrupts, but the + * hardware does not permit this. Delay 10 ms instead. + */ + priv->rx_itr_poll = clk_get_rate(priv->clk) / 100; + + nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); + + priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF; + + /* Flow control settings */ + + /* Pause time of 0.1 ms */ + val = 100000 / 512; + nb8800_writeb(priv, NB8800_PQ1, val >> 8); + nb8800_writeb(priv, NB8800_PQ2, val & 0xff); + + /* Auto-negotiate by default */ + priv->pause_aneg = true; + priv->pause_rx = true; + priv->pause_tx = true; + + nb8800_mc_init(dev, 0); + + return 0; +} + +static int nb8800_tangox_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + u32 pad_mode = PAD_MODE_MII; + + switch (priv->phy_mode) { + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + pad_mode = PAD_MODE_MII; + break; + + case PHY_INTERFACE_MODE_RGMII: + pad_mode = PAD_MODE_RGMII; + break; + + case PHY_INTERFACE_MODE_RGMII_TXID: + pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; + break; + + default: + dev_err(dev->dev.parent, "unsupported phy mode %s\n", + phy_modes(priv->phy_mode)); + return -EINVAL; + } + + nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode); + + return 0; +} + +static int nb8800_tangox_reset(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int clk_div; + + nb8800_writeb(priv, NB8800_TANGOX_RESET, 0); + usleep_range(1000, 10000); + nb8800_writeb(priv, NB8800_TANGOX_RESET, 1); + + wmb(); /* ensure reset is cleared before proceeding */ + + clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK); + nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div); + + return 0; +} + +static const struct nb8800_ops nb8800_tangox_ops = { + .init = nb8800_tangox_init, + .reset = nb8800_tangox_reset, +}; + +static int nb8800_tango4_init(struct net_device *dev) +{ + struct nb8800_priv *priv = netdev_priv(dev); + int err; + + err = nb8800_tangox_init(dev); + if (err) + return err; + + /* On tango4 interrupt on DMA completion per frame works and gives + * better performance despite generating more rx interrupts. + */ + + /* Disable unnecessary interrupt on rx completion */ + nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7)); + + /* Request interrupt on descriptor DMA completion */ + priv->rx_dma_config |= DESC_ID; + + return 0; +} + +static const struct nb8800_ops nb8800_tango4_ops = { + .init = nb8800_tango4_init, + .reset = nb8800_tangox_reset, +}; + +static const struct of_device_id nb8800_dt_ids[] = { + { + .compatible = "aurora,nb8800", + }, + { + .compatible = "sigma,smp8642-ethernet", + .data = &nb8800_tangox_ops, + }, + { + .compatible = "sigma,smp8734-ethernet", + .data = &nb8800_tango4_ops, + }, + { } +}; + +static int nb8800_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + const struct nb8800_ops *ops = NULL; + struct nb8800_priv *priv; + struct resource *res; + struct net_device *dev; + struct mii_bus *bus; + const unsigned char *mac; + void __iomem *base; + int irq; + int ret; + + match = of_match_device(nb8800_dt_ids, &pdev->dev); + if (match) + ops = match->data; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(&pdev->dev, "No IRQ\n"); + return -EINVAL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start); + + dev = alloc_etherdev(sizeof(*priv)); + if (!dev) + return -ENOMEM; + + platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + priv = netdev_priv(dev); + priv->base = base; + + priv->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (priv->phy_mode < 0) + priv->phy_mode = PHY_INTERFACE_MODE_RGMII; + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + ret = PTR_ERR(priv->clk); + goto err_free_dev; + } + + ret = clk_prepare_enable(priv->clk); + if (ret) + goto err_free_dev; + + spin_lock_init(&priv->tx_lock); + + if (ops && ops->reset) { + ret = ops->reset(dev); + if (ret) + goto err_free_dev; + } + + bus = devm_mdiobus_alloc(&pdev->dev); + if (!bus) { + ret = -ENOMEM; + goto err_disable_clk; + } + + bus->name = "nb8800-mii"; + bus->read = nb8800_mdio_read; + bus->write = nb8800_mdio_write; + bus->parent = &pdev->dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii", + (unsigned long)res->start); + bus->priv = priv; + + ret = of_mdiobus_register(bus, pdev->dev.of_node); + if (ret) { + dev_err(&pdev->dev, "failed to register MII bus\n"); + goto err_disable_clk; + } + + priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!priv->phy_node) { + dev_err(&pdev->dev, "no PHY specified\n"); + ret = -ENODEV; + goto err_free_bus; + } + + priv->mii_bus = bus; + + ret = nb8800_hw_init(dev); + if (ret) + goto err_free_bus; + + if (ops && ops->init) { + ret = ops->init(dev); + if (ret) + goto err_free_bus; + } + + dev->netdev_ops = &nb8800_netdev_ops; + dev->ethtool_ops = &nb8800_ethtool_ops; + dev->flags |= IFF_MULTICAST; + dev->irq = irq; + + mac = of_get_mac_address(pdev->dev.of_node); + if (mac) + ether_addr_copy(dev->dev_addr, mac); + + if (!is_valid_ether_addr(dev->dev_addr)) + eth_hw_addr_random(dev); + + nb8800_update_mac_addr(dev); + + netif_carrier_off(dev); + + ret = register_netdev(dev); + if (ret) { + netdev_err(dev, "failed to register netdev\n"); + goto err_free_dma; + } + + netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT); + + netdev_info(dev, "MAC address %pM\n", dev->dev_addr); + + return 0; + +err_free_dma: + nb8800_dma_free(dev); +err_free_bus: + mdiobus_unregister(bus); +err_disable_clk: + clk_disable_unprepare(priv->clk); +err_free_dev: + free_netdev(dev); + + return ret; +} + +static int nb8800_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct nb8800_priv *priv = netdev_priv(ndev); + + unregister_netdev(ndev); + + mdiobus_unregister(priv->mii_bus); + + clk_disable_unprepare(priv->clk); + + nb8800_dma_free(ndev); + free_netdev(ndev); + + return 0; +} + +static struct platform_driver nb8800_driver = { + .driver = { + .name = "nb8800", + .of_match_table = nb8800_dt_ids, + }, + .probe = nb8800_probe, + .remove = nb8800_remove, +}; + +module_platform_driver(nb8800_driver); + +MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver"); +MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h new file mode 100644 index 000000000000..e5adbc2aac9f --- /dev/null +++ b/drivers/net/ethernet/aurora/nb8800.h @@ -0,0 +1,316 @@ +#ifndef _NB8800_H_ +#define _NB8800_H_ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/phy.h> +#include <linux/clk.h> +#include <linux/bitops.h> + +#define RX_DESC_COUNT 256 +#define TX_DESC_COUNT 256 + +#define NB8800_DESC_LOW 4 + +#define RX_BUF_SIZE 1552 + +#define RX_COPYBREAK 256 +#define RX_COPYHDR 128 + +#define MAX_MDC_CLOCK 2500000 + +/* Stargate Solutions SSN8800 core registers */ +#define NB8800_TX_CTL1 0x000 +#define TX_TPD BIT(5) +#define TX_APPEND_FCS BIT(4) +#define TX_PAD_EN BIT(3) +#define TX_RETRY_EN BIT(2) +#define TX_EN BIT(0) + +#define NB8800_TX_CTL2 0x001 + +#define NB8800_RX_CTL 0x004 +#define RX_BC_DISABLE BIT(7) +#define RX_RUNT BIT(6) +#define RX_AF_EN BIT(5) +#define RX_PAUSE_EN BIT(3) +#define RX_SEND_CRC BIT(2) +#define RX_PAD_STRIP BIT(1) +#define RX_EN BIT(0) + +#define NB8800_RANDOM_SEED 0x008 +#define NB8800_TX_SDP 0x14 +#define NB8800_TX_TPDP1 0x18 +#define NB8800_TX_TPDP2 0x19 +#define NB8800_SLOT_TIME 0x1c + +#define NB8800_MDIO_CMD 0x020 +#define MDIO_CMD_GO BIT(31) +#define MDIO_CMD_WR BIT(26) +#define MDIO_CMD_ADDR(x) ((x) << 21) +#define MDIO_CMD_REG(x) ((x) << 16) +#define MDIO_CMD_DATA(x) ((x) << 0) + +#define NB8800_MDIO_STS 0x024 +#define MDIO_STS_ERR BIT(31) + +#define NB8800_MC_ADDR(i) (0x028 + (i)) +#define NB8800_MC_INIT 0x02e +#define NB8800_UC_ADDR(i) (0x03c + (i)) + +#define NB8800_MAC_MODE 0x044 +#define RGMII_MODE BIT(7) +#define HALF_DUPLEX BIT(4) +#define BURST_EN BIT(3) +#define LOOPBACK_EN BIT(2) +#define GMAC_MODE BIT(0) + +#define NB8800_IC_THRESHOLD 0x050 +#define NB8800_PE_THRESHOLD 0x051 +#define NB8800_PF_THRESHOLD 0x052 +#define NB8800_TX_BUFSIZE 0x054 +#define NB8800_FIFO_CTL 0x056 +#define NB8800_PQ1 0x060 +#define NB8800_PQ2 0x061 +#define NB8800_SRC_ADDR(i) (0x06a + (i)) +#define NB8800_STAT_DATA 0x078 +#define NB8800_STAT_INDEX 0x07c +#define NB8800_STAT_CLEAR 0x07d + +#define NB8800_SLEEP_MODE 0x07e +#define SLEEP_MODE BIT(0) + +#define NB8800_WAKEUP 0x07f +#define WAKEUP BIT(0) + +/* Aurora NB8800 host interface registers */ +#define NB8800_TXC_CR 0x100 +#define TCR_LK BIT(12) +#define TCR_DS BIT(11) +#define TCR_BTS(x) (((x) & 0x7) << 8) +#define TCR_DIE BIT(7) +#define TCR_TFI(x) (((x) & 0x7) << 4) +#define TCR_LE BIT(3) +#define TCR_RS BIT(2) +#define TCR_DM BIT(1) +#define TCR_EN BIT(0) + +#define NB8800_TXC_SR 0x104 +#define TSR_DE BIT(3) +#define TSR_DI BIT(2) +#define TSR_TO BIT(1) +#define TSR_TI BIT(0) + +#define NB8800_TX_SAR 0x108 +#define NB8800_TX_DESC_ADDR 0x10c + +#define NB8800_TX_REPORT_ADDR 0x110 +#define TX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xffff) +#define TX_FIRST_DEFERRAL BIT(7) +#define TX_EARLY_COLLISIONS(x) (((x) >> 3) & 0xf) +#define TX_LATE_COLLISION BIT(2) +#define TX_PACKET_DROPPED BIT(1) +#define TX_FIFO_UNDERRUN BIT(0) +#define IS_TX_ERROR(r) ((r) & 0x07) + +#define NB8800_TX_FIFO_SR 0x114 +#define NB8800_TX_ITR 0x118 + +#define NB8800_RXC_CR 0x200 +#define RCR_FL BIT(13) +#define RCR_LK BIT(12) +#define RCR_DS BIT(11) +#define RCR_BTS(x) (((x) & 7) << 8) +#define RCR_DIE BIT(7) +#define RCR_RFI(x) (((x) & 7) << 4) +#define RCR_LE BIT(3) +#define RCR_RS BIT(2) +#define RCR_DM BIT(1) +#define RCR_EN BIT(0) + +#define NB8800_RXC_SR 0x204 +#define RSR_DE BIT(3) +#define RSR_DI BIT(2) +#define RSR_RO BIT(1) +#define RSR_RI BIT(0) + +#define NB8800_RX_SAR 0x208 +#define NB8800_RX_DESC_ADDR 0x20c + +#define NB8800_RX_REPORT_ADDR 0x210 +#define RX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xFFFF) +#define RX_MULTICAST_PKT BIT(9) +#define RX_BROADCAST_PKT BIT(8) +#define RX_LENGTH_ERR BIT(7) +#define RX_FCS_ERR BIT(6) +#define RX_RUNT_PKT BIT(5) +#define RX_FIFO_OVERRUN BIT(4) +#define RX_LATE_COLLISION BIT(3) +#define RX_ALIGNMENT_ERROR BIT(2) +#define RX_ERROR_MASK 0xfc +#define IS_RX_ERROR(r) ((r) & RX_ERROR_MASK) + +#define NB8800_RX_FIFO_SR 0x214 +#define NB8800_RX_ITR 0x218 + +/* Sigma Designs SMP86xx additional registers */ +#define NB8800_TANGOX_PAD_MODE 0x400 +#define PAD_MODE_MASK 0x7 +#define PAD_MODE_MII 0x0 +#define PAD_MODE_RGMII 0x1 +#define PAD_MODE_GTX_CLK_INV BIT(3) +#define PAD_MODE_GTX_CLK_DELAY BIT(4) + +#define NB8800_TANGOX_MDIO_CLKDIV 0x420 +#define NB8800_TANGOX_RESET 0x424 + +/* Hardware DMA descriptor */ +struct nb8800_dma_desc { + u32 s_addr; /* start address */ + u32 n_addr; /* next descriptor address */ + u32 r_addr; /* report address */ + u32 config; +} __aligned(8); + +#define DESC_ID BIT(23) +#define DESC_EOC BIT(22) +#define DESC_EOF BIT(21) +#define DESC_LK BIT(20) +#define DESC_DS BIT(19) +#define DESC_BTS(x) (((x) & 0x7) << 16) + +/* DMA descriptor and associated data for rx. + * Allocated from coherent memory. + */ +struct nb8800_rx_desc { + /* DMA descriptor */ + struct nb8800_dma_desc desc; + + /* Status report filled in by hardware */ + u32 report; +}; + +/* Address of buffer on rx ring */ +struct nb8800_rx_buf { + struct page *page; + unsigned long offset; +}; + +/* DMA descriptors and associated data for tx. + * Allocated from coherent memory. + */ +struct nb8800_tx_desc { + /* DMA descriptor. The second descriptor is used if packet + * data is unaligned. + */ + struct nb8800_dma_desc desc[2]; + + /* Status report filled in by hardware */ + u32 report; + + /* Bounce buffer for initial unaligned part of packet */ + u8 buf[8] __aligned(8); +}; + +/* Packet in tx queue */ +struct nb8800_tx_buf { + /* Currently queued skb */ + struct sk_buff *skb; + + /* DMA address of the first descriptor */ + dma_addr_t dma_desc; + + /* DMA address of packet data */ + dma_addr_t dma_addr; + + /* Length of DMA mapping, less than skb->len if alignment + * buffer is used. + */ + unsigned int dma_len; + + /* Number of packets in chain starting here */ + unsigned int chain_len; + + /* Packet chain ready to be submitted to hardware */ + bool ready; +}; + +struct nb8800_priv { + struct napi_struct napi; + + void __iomem *base; + + /* RX DMA descriptors */ + struct nb8800_rx_desc *rx_descs; + + /* RX buffers referenced by DMA descriptors */ + struct nb8800_rx_buf *rx_bufs; + + /* Current end of chain */ + u32 rx_eoc; + + /* Value for rx interrupt time register in NAPI interrupt mode */ + u32 rx_itr_irq; + + /* Value for rx interrupt time register in NAPI poll mode */ + u32 rx_itr_poll; + + /* Value for config field of rx DMA descriptors */ + u32 rx_dma_config; + + /* TX DMA descriptors */ + struct nb8800_tx_desc *tx_descs; + + /* TX packet queue */ + struct nb8800_tx_buf *tx_bufs; + + /* Number of free tx queue entries */ + atomic_t tx_free; + + /* First free tx queue entry */ + u32 tx_next; + + /* Next buffer to transmit */ + u32 tx_queue; + + /* Start of current packet chain */ + struct nb8800_tx_buf *tx_chain; + + /* Next buffer to reclaim */ + u32 tx_done; + + /* Lock for DMA activation */ + spinlock_t tx_lock; + + struct mii_bus *mii_bus; + struct device_node *phy_node; + struct phy_device *phydev; + + /* PHY connection type from DT */ + int phy_mode; + + /* Current link status */ + int speed; + int duplex; + int link; + + /* Pause settings */ + bool pause_aneg; + bool pause_rx; + bool pause_tx; + + /* DMA base address of rx descriptors, see rx_descs above */ + dma_addr_t rx_desc_dma; + + /* DMA base address of tx descriptors, see tx_descs above */ + dma_addr_t tx_desc_dma; + + struct clk *clk; +}; + +struct nb8800_ops { + int (*init)(struct net_device *dev); + int (*reset)(struct net_device *dev); +}; + +#endif /* _NB8800_H_ */ diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8b1929e9f698..a54bafad3538 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2884,33 +2884,21 @@ struct platform_driver bcm63xx_enet_shared_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bcm63xx_enet_shared_driver, + &bcm63xx_enet_driver, + &bcm63xx_enetsw_driver, +}; + /* entry point */ static int __init bcm_enet_init(void) { - int ret; - - ret = platform_driver_register(&bcm63xx_enet_shared_driver); - if (ret) - return ret; - - ret = platform_driver_register(&bcm63xx_enet_driver); - if (ret) - platform_driver_unregister(&bcm63xx_enet_shared_driver); - - ret = platform_driver_register(&bcm63xx_enetsw_driver); - if (ret) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); - } - - return ret; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit bcm_enet_exit(void) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enetsw_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 0b214b5d944a..cae0956186ce 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -590,8 +590,6 @@ struct bnx2x_fastpath { /* The last maximal completed SGE */ u16 last_max_sge; __le16 *rx_cons_sb; - unsigned long rx_pkt, - rx_calls; /* TPA related */ struct bnx2x_agg_info *tpa_info; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index d9add7c02e42..b3552dd749c4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -558,10 +558,8 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp, put_page(pool->page); pool->page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT); - if (unlikely(!pool->page)) { - BNX2X_ERR("Can't alloc sge\n"); + if (unlikely(!pool->page)) return -ENOMEM; - } pool->offset = 0; } @@ -745,7 +743,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp, bnx2x_gro_csum(bp, skb, bnx2x_gro_ipv6_csum); break; default: - BNX2X_ERR("Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n", + WARN_ONCE(1, "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n", be16_to_cpu(skb->protocol)); } } @@ -1124,9 +1122,6 @@ next_cqe: bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod, fp->rx_sge_prod); - fp->rx_pkt += rx_pkt; - fp->rx_calls++; - return rx_pkt; } @@ -3206,42 +3201,32 @@ int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state) */ static int bnx2x_poll(struct napi_struct *napi, int budget) { - int work_done = 0; - u8 cos; struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath, napi); struct bnx2x *bp = fp->bp; + int rx_work_done; + u8 cos; - while (1) { #ifdef BNX2X_STOP_ON_ERROR - if (unlikely(bp->panic)) { - napi_complete(napi); - return 0; - } + if (unlikely(bp->panic)) { + napi_complete(napi); + return 0; + } #endif - for_each_cos_in_tx_queue(fp, cos) - if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) - bnx2x_tx_int(bp, fp->txdata_ptr[cos]); - - if (bnx2x_has_rx_work(fp)) { - work_done += bnx2x_rx_int(fp, budget - work_done); - - /* must not complete if we consumed full budget */ - if (work_done >= budget) - break; - } + for_each_cos_in_tx_queue(fp, cos) + if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) + bnx2x_tx_int(bp, fp->txdata_ptr[cos]); - /* Fall out from the NAPI loop if needed */ - if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + rx_work_done = (bnx2x_has_rx_work(fp)) ? bnx2x_rx_int(fp, budget) : 0; - /* No need to update SB for FCoE L2 ring as long as - * it's connected to the default SB and the SB - * has been updated when NAPI was scheduled. - */ - if (IS_FCOE_FP(fp)) { - napi_complete(napi); - break; - } + if (rx_work_done < budget) { + /* No need to update SB for FCoE L2 ring as long as + * it's connected to the default SB and the SB + * has been updated when NAPI was scheduled. + */ + if (IS_FCOE_FP(fp)) { + napi_complete(napi); + } else { bnx2x_update_fpsb_idx(fp); /* bnx2x_has_rx_work() reads the status block, * thus we need to ensure that status block indices @@ -3266,12 +3251,13 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, le16_to_cpu(fp->fp_hc_idx), IGU_INT_ENABLE, 1); - break; + } else { + rx_work_done = budget; } } } - return work_done; + return rx_work_done; } /* we split the first BD into headers and data BDs @@ -4444,7 +4430,6 @@ static int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp, /* Limit the CQE producer by the CQE ring size */ fp->rx_comp_prod = min_t(u16, NUM_RCQ_RINGS*RCQ_DESC_CNT, cqe_ring_prod); - fp->rx_pkt = fp->rx_calls = 0; bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed += failure_cnt; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index a3ce9f2a2335..820b7e04bb5f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -74,118 +74,115 @@ static const struct { static const struct { long offset; int size; - u32 flags; -#define STATS_FLAGS_PORT 1 -#define STATS_FLAGS_FUNC 2 -#define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) + bool is_port_stat; char string[ETH_GSTRING_LEN]; } bnx2x_stats_arr[] = { /* 1 */ { STATS_OFFSET32(total_bytes_received_hi), - 8, STATS_FLAGS_BOTH, "rx_bytes" }, + 8, false, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), - 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, + 8, false, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, + 8, false, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, + 8, false, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, + 8, false, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), - 8, STATS_FLAGS_PORT, "rx_crc_errors" }, + 8, true, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), - 8, STATS_FLAGS_PORT, "rx_align_errors" }, + 8, true, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), - 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, + 8, true, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), - 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, + 8, true, "rx_oversize_packets" }, /* 10 */{ STATS_OFFSET32(rx_stat_etherstatsfragments_hi), - 8, STATS_FLAGS_PORT, "rx_fragments" }, + 8, true, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), - 8, STATS_FLAGS_PORT, "rx_jabbers" }, + 8, true, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), - 8, STATS_FLAGS_BOTH, "rx_discards" }, + 8, false, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), - 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, + 4, true, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), - 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, + 4, true, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), - 8, STATS_FLAGS_PORT, "pfc_frames_received" }, + 8, true, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), - 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, + 8, true, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), - 8, STATS_FLAGS_PORT, "rx_brb_discard" }, + 8, true, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), - 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, + 8, true, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), - 8, STATS_FLAGS_PORT, "rx_pause_frames" }, + 8, true, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), - 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, + 8, true, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), - 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, + 4, true, "rx_constant_pause_events" }, /* 20 */{ STATS_OFFSET32(rx_err_discard_pkt), - 4, STATS_FLAGS_BOTH, "rx_phy_ip_err_discards"}, + 4, false, "rx_phy_ip_err_discards"}, { STATS_OFFSET32(rx_skb_alloc_failed), - 4, STATS_FLAGS_BOTH, "rx_skb_alloc_discard" }, + 4, false, "rx_skb_alloc_discard" }, { STATS_OFFSET32(hw_csum_err), - 4, STATS_FLAGS_BOTH, "rx_csum_offload_errors" }, + 4, false, "rx_csum_offload_errors" }, { STATS_OFFSET32(driver_xoff), - 4, STATS_FLAGS_BOTH, "tx_exhaustion_events" }, + 4, false, "tx_exhaustion_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_bytes" }, + 8, false, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), - 8, STATS_FLAGS_PORT, "tx_error_bytes" }, + 8, true, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, + 8, false, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, + 8, false, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, + 8, false, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), - 8, STATS_FLAGS_PORT, "tx_mac_errors" }, + 8, true, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), - 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, + 8, true, "tx_carrier_errors" }, /* 30 */{ STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), - 8, STATS_FLAGS_PORT, "tx_single_collisions" }, + 8, true, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), - 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, + 8, true, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), - 8, STATS_FLAGS_PORT, "tx_deferred" }, + 8, true, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), - 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, + 8, true, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), - 8, STATS_FLAGS_PORT, "tx_late_collisions" }, + 8, true, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), - 8, STATS_FLAGS_PORT, "tx_total_collisions" }, + 8, true, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), - 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, + 8, true, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), - 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, + 8, true, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), - 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, + 8, true, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), - 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, + 8, true, "tx_256_to_511_byte_packets" }, /* 40 */{ STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), - 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, + 8, true, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), - 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, + 8, true, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), - 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, + 8, true, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), - 8, STATS_FLAGS_PORT, "tx_pause_frames" }, + 8, true, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), - 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, + 8, false, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), - 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, + 8, false, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), - 8, STATS_FLAGS_FUNC, "tpa_bytes"}, + 8, false, "tpa_bytes"}, { STATS_OFFSET32(recoverable_error), - 4, STATS_FLAGS_FUNC, "recoverable_errors" }, + 4, false, "recoverable_errors" }, { STATS_OFFSET32(unrecoverable_error), - 4, STATS_FLAGS_FUNC, "unrecoverable_errors" }, + 4, false, "unrecoverable_errors" }, { STATS_OFFSET32(driver_filtered_tx_pkt), - 4, STATS_FLAGS_FUNC, "driver_filtered_tx_pkt" }, + 4, false, "driver_filtered_tx_pkt" }, { STATS_OFFSET32(eee_tx_lpi), - 4, STATS_FLAGS_PORT, "Tx LPI entry count"} + 4, true, "Tx LPI entry count"} }; #define BNX2X_NUM_STATS ARRAY_SIZE(bnx2x_stats_arr) @@ -3066,9 +3063,7 @@ static void bnx2x_self_test(struct net_device *dev, } } -#define IS_PORT_STAT(i) \ - ((bnx2x_stats_arr[i].flags & STATS_FLAGS_BOTH) == STATS_FLAGS_PORT) -#define IS_FUNC_STAT(i) (bnx2x_stats_arr[i].flags & STATS_FLAGS_FUNC) +#define IS_PORT_STAT(i) (bnx2x_stats_arr[i].is_port_stat) #define HIDE_PORT_STAT(bp) IS_VF(bp) /* ethtool statistics are displayed for all regular ethernet queues and the @@ -3093,7 +3088,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) num_strings = 0; if (HIDE_PORT_STAT(bp)) { for (i = 0; i < BNX2X_NUM_STATS; i++) - if (IS_FUNC_STAT(i)) + if (!IS_PORT_STAT(i)) num_strings++; } else num_strings += BNX2X_NUM_STATS; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2273576404b4..6c4e3a69976f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10139,8 +10139,8 @@ static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port) DP(BNX2X_MSG_SP, "Invalid vxlan port\n"); return; } - bp->vxlan_dst_port--; - if (bp->vxlan_dst_port) + bp->vxlan_dst_port_count--; + if (bp->vxlan_dst_port_count) return; if (netif_running(bp->dev)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f2d0dc9b1c41..11446adc03cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3625,6 +3625,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) pf->fw_fid = le16_to_cpu(resp->fid); pf->port_id = le16_to_cpu(resp->port_id); memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN); + memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN); pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings); @@ -3648,8 +3649,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->fw_fid = le16_to_cpu(resp->fid); memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN); - if (!is_valid_ether_addr(vf->mac_addr)) - random_ether_addr(vf->mac_addr); + if (is_valid_ether_addr(vf->mac_addr)) + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + else + random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -3880,6 +3884,8 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #endif } +static int bnxt_cfg_rx_mode(struct bnxt *); + static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) { int rc = 0; @@ -3946,11 +3952,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) bp->vnic_info[0].rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; - rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); - if (rc) { - netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc); + rc = bnxt_cfg_rx_mode(bp); + if (rc) goto err_out; - } rc = bnxt_hwrm_set_coal(bp); if (rc) @@ -4863,7 +4867,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) } } -static void bnxt_cfg_rx_mode(struct bnxt *bp) +static int bnxt_cfg_rx_mode(struct bnxt *bp) { struct net_device *dev = bp->dev; struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; @@ -4912,6 +4916,7 @@ static void bnxt_cfg_rx_mode(struct bnxt *bp) netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc); vnic->uc_filter_count = i; + return rc; } } @@ -4920,6 +4925,8 @@ skip_uc: if (rc) netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc); + + return rc; } static netdev_features_t bnxt_fix_features(struct net_device *dev, @@ -5210,13 +5217,27 @@ init_err: static int bnxt_change_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; + struct bnxt *bp = netdev_priv(dev); + int rc = 0; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; +#ifdef CONFIG_BNXT_SRIOV + if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr)) + return -EADDRNOTAVAIL; +#endif + + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) + return 0; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + if (netif_running(dev)) { + bnxt_close_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); + } - return 0; + return rc; } /* rtnl_lock held */ @@ -5684,15 +5705,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); dflt_rings = netif_get_num_default_rss_queues(); - if (BNXT_PF(bp)) { - memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN); + if (BNXT_PF(bp)) bp->pf.max_irqs = max_irqs; - } else { #if defined(CONFIG_BNXT_SRIOV) - memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN); + else bp->vf.max_irqs = max_irqs; #endif - } bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings); bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings); bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index f4cf68861069..7a9af2887d8e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -804,10 +804,9 @@ void bnxt_update_vf_mac(struct bnxt *bp) if (!is_valid_ether_addr(resp->perm_mac_address)) goto update_vf_mac_exit; - if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr)) - goto update_vf_mac_exit; - - memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN); + if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr)) + memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN); + /* overwrite netdev dev_adr with admin VF MAC */ memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN); update_vf_mac_exit: mutex_unlock(&bp->hwrm_cmd_lock); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 88c1e1a834f8..169059c92f80 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1682,6 +1682,8 @@ static void macb_init_hw(struct macb *bp) macb_set_hwaddr(bp); config = macb_mdc_clk_div(bp); + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) + config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ config |= MACB_BIT(PAE); /* PAuse Enable */ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ @@ -2416,6 +2418,8 @@ static int macb_init(struct platform_device *pdev) /* Set MII management clock divider */ val = macb_mdc_clk_div(bp); val |= macb_dbw(bp); + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) + val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); macb_writel(bp, NCFGR, val); return 0; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 6e1faea00ca8..d83b0db77821 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -215,12 +215,17 @@ /* GEM specific NCFGR bitfields. */ #define GEM_GBE_OFFSET 10 /* Gigabit mode enable */ #define GEM_GBE_SIZE 1 +#define GEM_PCSSEL_OFFSET 11 +#define GEM_PCSSEL_SIZE 1 #define GEM_CLK_OFFSET 18 /* MDC clock division */ #define GEM_CLK_SIZE 3 #define GEM_DBW_OFFSET 21 /* Data bus width */ #define GEM_DBW_SIZE 2 #define GEM_RXCOEN_OFFSET 24 #define GEM_RXCOEN_SIZE 1 +#define GEM_SGMIIEN_OFFSET 27 +#define GEM_SGMIIEN_SIZE 1 + /* Constants for data bus width. */ #define GEM_DBW32 0 /* 32 bit AMBA AHB data bus width */ diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index d3950b20feb9..688828865c48 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -120,10 +120,9 @@ * Calculated for SCLK of 700Mhz * value written should be a 1/16th of what is expected * - * 1 tick per 0.05usec = value of 2.2 - * This 10% would be covered in CQ timer thresh value + * 1 tick per 0.025usec */ -#define NICPF_CLK_PER_INT_TICK 2 +#define NICPF_CLK_PER_INT_TICK 1 /* Time to wait before we decide that a SQ is stuck. * @@ -266,6 +265,7 @@ struct nicvf { u8 tns_mode:1; u8 sqs_mode:1; u8 loopback_supported:1; + bool hw_tso; u16 mtu; struct queue_set *qs; #define MAX_SQS_PER_VF_SINGLE_NODE 5 @@ -490,6 +490,11 @@ static inline int nic_get_node_id(struct pci_dev *pdev) return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK); } +static inline bool pass1_silicon(struct pci_dev *pdev) +{ + return pdev->revision < 8; +} + int nicvf_set_real_num_queues(struct net_device *netdev, int tx_queues, int rx_queues); int nicvf_open(struct net_device *netdev); diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index c561fdcb79a7..9f80de4d5016 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -37,6 +37,7 @@ struct nicpf { #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) u8 vf_lmac_map[MAX_LMAC]; + u8 lmac_cnt; struct delayed_work dwork; struct workqueue_struct *check_link; u8 link[MAX_LMAC]; @@ -54,11 +55,6 @@ struct nicpf { bool irq_allocated[NIC_PF_MSIX_VECTORS]; }; -static inline bool pass1_silicon(struct nicpf *nic) -{ - return nic->pdev->revision < 8; -} - /* Supported devices */ static const struct pci_device_id nic_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) }, @@ -122,7 +118,7 @@ static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx) * when PF writes to MBOX(1), in next revisions when * PF writes to MBOX(0) */ - if (pass1_silicon(nic)) { + if (pass1_silicon(nic->pdev)) { /* see the comment for nic_reg_write()/nic_reg_read() * functions above */ @@ -279,6 +275,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) u64 lmac_credit; nic->num_vf_en = 0; + nic->lmac_cnt = 0; for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { if (!(bgx_map & (1 << bgx))) @@ -288,6 +285,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic->vf_lmac_map[next_bgx_lmac++] = NIC_SET_VF_LMAC_MAP(bgx, lmac); nic->num_vf_en += lmac_cnt; + nic->lmac_cnt += lmac_cnt; /* Program LMAC credits */ lmac_credit = (1ull << 1); /* channel credit enable */ @@ -397,7 +395,7 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */ /* Leave RSS_SIZE as '0' to disable RSS */ - if (pass1_silicon(nic)) { + if (pass1_silicon(nic->pdev)) { nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3), (vnic << 24) | (padd << 16) | (rssi_base + rssi)); @@ -467,7 +465,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) } cpi_base = nic->cpi_base[cfg->vf_id]; - if (pass1_silicon(nic)) + if (pass1_silicon(nic->pdev)) idx_addr = NIC_PF_CPI_0_2047_CFG; else idx_addr = NIC_PF_MPI_0_2047_CFG; @@ -715,6 +713,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_CFG_DONE: /* Last message of VF config msg sequence */ nic->vf_enabled[vf] = true; + if (vf >= nic->lmac_cnt) + goto unlock; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, true); goto unlock; case NIC_MBOX_MSG_SHUTDOWN: /* First msg in VF teardown sequence */ @@ -722,6 +727,14 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) if (vf >= nic->num_vf_en) nic->sqs_used[vf - nic->num_vf_en] = false; nic->pqs_vf[vf] = 0; + + if (vf >= nic->lmac_cnt) + break; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, false); break; case NIC_MBOX_MSG_ALLOC_SQS: nic_alloc_sqs(nic, &mbx.sqs_alloc); @@ -940,7 +953,7 @@ static void nic_poll_for_link(struct work_struct *work) mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; - for (vf = 0; vf < nic->num_vf_en; vf++) { + for (vf = 0; vf < nic->lmac_cnt; vf++) { /* Poll only if VF is UP */ if (!nic->vf_enabled[vf]) continue; @@ -1074,8 +1087,7 @@ static void nic_remove(struct pci_dev *pdev) if (nic->check_link) { /* Destroy work Queue */ - cancel_delayed_work(&nic->dwork); - flush_workqueue(nic->check_link); + cancel_delayed_work_sync(&nic->dwork); destroy_workqueue(nic->check_link); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index af54c10945c2..a12b2e38cf61 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -112,6 +112,13 @@ static int nicvf_get_settings(struct net_device *netdev, cmd->supported = 0; cmd->transceiver = XCVR_EXTERNAL; + + if (!nic->link_up) { + cmd->duplex = DUPLEX_UNKNOWN; + ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + return 0; + } + if (nic->speed <= 1000) { cmd->port = PORT_MII; cmd->autoneg = AUTONEG_ENABLE; @@ -125,6 +132,13 @@ static int nicvf_get_settings(struct net_device *netdev, return 0; } +static u32 nicvf_get_link(struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + + return nic->link_up; +} + static void nicvf_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -660,7 +674,7 @@ static int nicvf_set_channels(struct net_device *dev, static const struct ethtool_ops nicvf_ethtool_ops = { .get_settings = nicvf_get_settings, - .get_link = ethtool_op_get_link, + .get_link = nicvf_get_link, .get_drvinfo = nicvf_get_drvinfo, .get_msglevel = nicvf_get_msglevel, .set_msglevel = nicvf_set_msglevel, diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 7f709cbdcd87..c24cb2a86a42 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -525,14 +525,22 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, cqe_tx->sqe_ptr, hdr->subdesc_cnt); - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; - /* For TSO offloaded packets only one head SKB needs to be freed */ + /* For TSO offloaded packets only one SQE will have a valid SKB */ if (skb) { + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); dev_consume_skb_any(skb); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; + } else { + /* In case of HW TSO, HW sends a CQE for each segment of a TSO + * packet instead of a single CQE for the whole TSO packet + * transmitted. Each of this CQE points to the same SQE, so + * avoid freeing same SQE multiple times. + */ + if (!nic->hw_tso) + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); } } @@ -1057,6 +1065,7 @@ int nicvf_stop(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_stop_all_queues(nic->netdev); + nic->link_up = false; /* Teardown secondary qsets first */ if (!nic->sqs_mode) { @@ -1211,9 +1220,6 @@ int nicvf_open(struct net_device *netdev) nic->drv_stats.txq_stop = 0; nic->drv_stats.txq_wake = 0; - netif_carrier_on(netdev); - netif_tx_start_all_queues(netdev); - return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); @@ -1551,6 +1557,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; + if (!pass1_silicon(nic->pdev)) + nic->hw_tso = true; + netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index e404ea837727..d0d1b5490061 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -18,14 +18,6 @@ #include "q_struct.h" #include "nicvf_queues.h" -struct rbuf_info { - struct page *page; - void *data; - u64 offset; -}; - -#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES)) - /* Poll a register for a specific value */ static int nicvf_poll_reg(struct nicvf *nic, int qidx, u64 reg, int bit_pos, int bits, int val) @@ -86,8 +78,6 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, u32 buf_len, u64 **rbuf) { - u64 data; - struct rbuf_info *rinfo; int order = get_order(buf_len); /* Check if request can be accomodated in previous allocated page */ @@ -113,46 +103,28 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page_offset = 0; } - data = (u64)page_address(nic->rb_page) + nic->rb_page_offset; - - /* Align buffer addr to cache line i.e 128 bytes */ - rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data)); - /* Save page address for reference updation */ - rinfo->page = nic->rb_page; - /* Store start address for later retrieval */ - rinfo->data = (void *)data; - /* Store alignment offset */ - rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data); + *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset); - data += rinfo->offset; - - /* Give next aligned address to hw for DMA */ - *rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES); return 0; } -/* Retrieve actual buffer start address and build skb for received packet */ +/* Build skb around receive buffer */ static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic, u64 rb_ptr, int len) { + void *data; struct sk_buff *skb; - struct rbuf_info *rinfo; - rb_ptr = (u64)phys_to_virt(rb_ptr); - /* Get buffer start address and alignment offset */ - rinfo = GET_RBUF_INFO(rb_ptr); + data = phys_to_virt(rb_ptr); /* Now build an skb to give to stack */ - skb = build_skb(rinfo->data, RCV_FRAG_LEN); + skb = build_skb(data, RCV_FRAG_LEN); if (!skb) { - put_page(rinfo->page); + put_page(virt_to_page(data)); return NULL; } - /* Set correct skb->data */ - skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES); - - prefetch((void *)rb_ptr); + prefetch(skb->data); return skb; } @@ -196,7 +168,6 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) int head, tail; u64 buf_addr; struct rbdr_entry_t *desc; - struct rbuf_info *rinfo; if (!rbdr) return; @@ -212,16 +183,14 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); - put_page(rinfo->page); + put_page(virt_to_page(phys_to_virt(buf_addr))); head++; head &= (rbdr->dmem.q_len - 1); } /* Free SKB of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); - put_page(rinfo->page); + put_page(virt_to_page(phys_to_virt(buf_addr))); /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); @@ -330,7 +299,7 @@ static int nicvf_init_cmp_queue(struct nicvf *nic, return err; cq->desc = cq->dmem.base; - cq->thresh = CMP_QUEUE_CQE_THRESH; + cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH; nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1; return 0; @@ -592,7 +561,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, /* Set threshold value for interrupt generation */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh); nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, - qidx, nic->cq_coalesce_usecs); + qidx, CMP_QUEUE_TIMER_THRESH); } /* Configures transmit queue */ @@ -956,7 +925,7 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) { int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; - if (skb_shinfo(skb)->gso_size) { + if (skb_shinfo(skb)->gso_size && !nic->hw_tso) { subdesc_cnt = nicvf_tso_count_subdescs(skb); return subdesc_cnt; } @@ -971,7 +940,7 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) * First subdescriptor for every send descriptor. */ static inline void -nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, +nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, int subdesc_cnt, struct sk_buff *skb, int len) { int proto; @@ -1007,6 +976,15 @@ nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, break; } } + + if (nic->hw_tso && skb_shinfo(skb)->gso_size) { + hdr->tso = 1; + hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr->tso_max_paysize = skb_shinfo(skb)->gso_size; + /* For non-tunneled pkts, point this to L2 ethertype */ + hdr->inner_l3_offset = skb_network_offset(skb) - 2; + nic->drv_stats.tx_tso++; + } } /* SQ GATHER subdescriptor @@ -1076,7 +1054,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, data_left -= size; tso_build_data(skb, &tso, size); } - nicvf_sq_add_hdr_subdesc(sq, hdr_qentry, + nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry, seg_subdescs - 1, skb, seg_len); sq->skbuff[hdr_qentry] = (u64)NULL; qentry = nicvf_get_nxt_sqentry(sq, qentry); @@ -1129,11 +1107,12 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) qentry = nicvf_get_sq_desc(sq, subdesc_cnt); /* Check if its a TSO packet */ - if (skb_shinfo(skb)->gso_size) + if (skb_shinfo(skb)->gso_size && !nic->hw_tso) return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb); /* Add SQ header subdesc */ - nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len); + nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1, + skb, skb->len); /* Add SQ gather subdescs */ qentry = nicvf_get_nxt_sqentry(sq, qentry); @@ -1234,153 +1213,93 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) return skb; } -/* Enable interrupt */ -void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +static u64 nicvf_int_type_to_mask(int int_type, int q_idx) { u64 reg_val; - reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); - switch (int_type) { case NICVF_INTR_CQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); break; case NICVF_INTR_SQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); break; case NICVF_INTR_RBDR: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); break; case NICVF_INTR_PKT_DROP: - reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); break; case NICVF_INTR_TCP_TIMER: - reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); break; case NICVF_INTR_MBOX: - reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); break; case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT); break; default: - netdev_err(nic->netdev, - "Failed to enable interrupt: unknown type\n"); - break; + reg_val = 0; } - nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val); + return reg_val; +} + +/* Enable interrupt */ +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to enable interrupt: unknown type\n"); + return; + } + nicvf_reg_write(nic, NIC_VF_ENA_W1S, + nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask); } /* Disable interrupt */ void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val = 0; + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); - switch (int_type) { - case NICVF_INTR_CQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); - break; - case NICVF_INTR_TCP_TIMER: - reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); - break; - case NICVF_INTR_MBOX: - reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); - break; - case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); - break; - default: - netdev_err(nic->netdev, + if (!mask) { + netdev_dbg(nic->netdev, "Failed to disable interrupt: unknown type\n"); - break; + return; } - nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val); + nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask); } /* Clear interrupt */ void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val = 0; + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); - switch (int_type) { - case NICVF_INTR_CQ: - reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); - break; - case NICVF_INTR_TCP_TIMER: - reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); - break; - case NICVF_INTR_MBOX: - reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); - break; - case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); - break; - default: - netdev_err(nic->netdev, + if (!mask) { + netdev_dbg(nic->netdev, "Failed to clear interrupt: unknown type\n"); - break; + return; } - nicvf_reg_write(nic, NIC_VF_INT, reg_val); + nicvf_reg_write(nic, NIC_VF_INT, mask); } /* Check if interrupt is enabled */ int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val; - u64 mask = 0xff; - - reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); - - switch (int_type) { - case NICVF_INTR_CQ: - mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - mask = NICVF_INTR_PKT_DROP_MASK; - break; - case NICVF_INTR_TCP_TIMER: - mask = NICVF_INTR_TCP_TIMER_MASK; - break; - case NICVF_INTR_MBOX: - mask = NICVF_INTR_MBOX_MASK; - break; - case NICVF_INTR_QS_ERR: - mask = NICVF_INTR_QS_ERR_MASK; - break; - default: - netdev_err(nic->netdev, + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + /* If interrupt type is unknown, we treat it disabled. */ + if (!mask) { + netdev_dbg(nic->netdev, "Failed to check interrupt enable: unknown type\n"); - break; + return 0; } - return (reg_val & mask); + return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S); } void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index fb4957d09914..c5030a7f213a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -75,18 +75,16 @@ */ #define CMP_QSIZE CMP_QUEUE_SIZE2 #define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) -#define CMP_QUEUE_CQE_THRESH 0 -#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ +#define CMP_QUEUE_CQE_THRESH (NAPI_POLL_WEIGHT / 2) +#define CMP_QUEUE_TIMER_THRESH 80 /* ~2usec */ #define RBDR_SIZE RBDR_SIZE0 #define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) #define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) #define RBDR_THRESH (RCV_BUF_COUNT / 2) #define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ -#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ - (NICVF_RCV_BUF_ALIGN_BYTES * 2)) -#define RCV_DATA_OFFSET NICVF_RCV_BUF_ALIGN_BYTES +#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ MAX_CQE_PER_PKT_XMIT) @@ -108,10 +106,6 @@ #define NICVF_SQ_BASE_ALIGN_BYTES 128 /* 7 bits */ #define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES) ALIGN(ADDR, ALIGN_BYTES) -#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\ - (NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES) -#define NICVF_RCV_BUF_ALIGN_LEN(X)\ - (NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X) /* Queue enable/disable */ #define NICVF_SQ_EN BIT_ULL(19) diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h index 3c1de97b1add..9e6d9876bfd0 100644 --- a/drivers/net/ethernet/cavium/thunder/q_struct.h +++ b/drivers/net/ethernet/cavium/thunder/q_struct.h @@ -545,25 +545,28 @@ struct sq_hdr_subdesc { u64 subdesc_cnt:8; u64 csum_l4:2; u64 csum_l3:1; - u64 rsvd0:5; + u64 csum_inner_l4:2; + u64 csum_inner_l3:1; + u64 rsvd0:2; u64 l4_offset:8; u64 l3_offset:8; u64 rsvd1:4; u64 tot_len:20; /* W0 */ - u64 tso_sdc_cont:8; - u64 tso_sdc_first:8; - u64 tso_l4_offset:8; - u64 tso_flags_last:12; - u64 tso_flags_first:12; - u64 rsvd2:2; + u64 rsvd2:24; + u64 inner_l4_offset:8; + u64 inner_l3_offset:8; + u64 tso_start:8; + u64 rsvd3:2; u64 tso_max_paysize:14; /* W1 */ #elif defined(__LITTLE_ENDIAN_BITFIELD) u64 tot_len:20; u64 rsvd1:4; u64 l3_offset:8; u64 l4_offset:8; - u64 rsvd0:5; + u64 rsvd0:2; + u64 csum_inner_l3:1; + u64 csum_inner_l4:2; u64 csum_l3:1; u64 csum_l4:2; u64 subdesc_cnt:8; @@ -574,12 +577,11 @@ struct sq_hdr_subdesc { u64 subdesc_type:4; /* W0 */ u64 tso_max_paysize:14; - u64 rsvd2:2; - u64 tso_flags_first:12; - u64 tso_flags_last:12; - u64 tso_l4_offset:8; - u64 tso_sdc_first:8; - u64 tso_sdc_cont:8; /* W1 */ + u64 rsvd3:2; + u64 tso_start:8; + u64 inner_l3_offset:8; + u64 inner_l4_offset:8; + u64 rsvd2:24; /* W1 */ #endif }; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 180aa9fabf48..9df26c2263bc 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -186,6 +186,23 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) } EXPORT_SYMBOL(bgx_set_lmac_mac); +void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + u64 cfg; + + if (!bgx) + return; + + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + if (enable) + cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; + else + cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); +} +EXPORT_SYMBOL(bgx_lmac_rx_tx_enable); + static void bgx_sgmii_change_link_state(struct lmac *lmac) { struct bgx *bgx = lmac->bgx; @@ -612,6 +629,8 @@ static void bgx_poll_for_link(struct work_struct *work) lmac->last_duplex = 1; } else { lmac->link_up = 0; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; } if (lmac->last_link != lmac->link_up) { @@ -654,8 +673,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) } /* Enable lmac */ - bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, - CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN); + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); /* Restore default cfg, incase low level firmware changed it */ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); @@ -695,8 +713,7 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; if (lmac->check_link) { /* Destroy work queue */ - cancel_delayed_work(&lmac->dwork); - flush_workqueue(lmac->check_link); + cancel_delayed_work_sync(&lmac->dwork); destroy_workqueue(lmac->check_link); } @@ -1009,6 +1026,9 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct bgx *bgx = NULL; u8 lmac; + /* Load octeon mdio driver */ + octeon_mdiobus_force_mod_depencency(); + bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); if (!bgx) return -ENOMEM; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 07b7ec66c60d..149e179363a1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -182,6 +182,8 @@ enum MCAST_MODE { #define BCAST_ACCEPT 1 #define CAM_ACCEPT 1 +void octeon_mdiobus_force_mod_depencency(void); +void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable); void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac); unsigned bgx_get_map(int node); int bgx_get_lmac_count(int node, int bgx); diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index a79813a17b6e..4d187f22c48b 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -65,13 +65,14 @@ config CHELSIO_T3 will be called cxgb3. config CHELSIO_T4 - tristate "Chelsio Communications T4/T5 Ethernet support" + tristate "Chelsio Communications T4/T5/T6 Ethernet support" depends on PCI && (IPV6 || IPV6=n) select FW_LOADER select MDIO ---help--- - This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet - adapter and T5 based 40Gb Ethernet adapter. + This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet + adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb + Ethernet adapters. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. @@ -85,7 +86,7 @@ config CHELSIO_T4 will be called cxgb4. config CHELSIO_T4_DCB - bool "Data Center Bridging (DCB) Support for Chelsio T4/T5 cards" + bool "Data Center Bridging (DCB) Support for Chelsio T4/T5/T6 cards" default n depends on CHELSIO_T4 && DCB ---help--- @@ -107,12 +108,12 @@ config CHELSIO_T4_FCOE If unsure, say N. config CHELSIO_T4VF - tristate "Chelsio Communications T4/T5 Virtual Function Ethernet support" + tristate "Chelsio Communications T4/T5/T6 Virtual Function Ethernet support" depends on PCI ---help--- - This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet - adapters and T5 based 40Gb Ethernet adapters with PCI-E SR-IOV Virtual - Functions. + This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet + adapters and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb + Ethernet adapters with PCI-E SR-IOV Virtual Functions. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c index ec5e05052d99..eb462d7db427 100644 --- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c +++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c @@ -570,7 +570,7 @@ static void pm3393_destroy(struct cmac *cmac) kfree(cmac); } -static struct cmac_ops pm3393_ops = { +static const struct cmac_ops pm3393_ops = { .destroy = pm3393_destroy, .reset = pm3393_reset, .interrupt_enable = pm3393_interrupt_enable, diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c index b0cb388f5e12..6f30b6f78553 100644 --- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c +++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c @@ -666,7 +666,7 @@ static void mac_destroy(struct cmac *mac) kfree(mac); } -static struct cmac_ops vsc7326_ops = { +static const struct cmac_ops vsc7326_ops = { .destroy = mac_destroy, .reset = mac_reset, .interrupt_handler = mac_intr_handler, diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 8f7aa53a4c4b..60908eab3b3a 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -701,15 +701,16 @@ static ssize_t attr_store(struct device *d, ssize_t(*set) (struct net_device *, unsigned int), unsigned int min_val, unsigned int max_val) { - char *endp; ssize_t ret; unsigned int val; if (!capable(CAP_NET_ADMIN)) return -EPERM; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val < min_val || val > max_val) + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + if (val < min_val || val > max_val) return -EINVAL; rtnl_lock(); @@ -829,14 +830,15 @@ static ssize_t tm_attr_store(struct device *d, struct port_info *pi = netdev_priv(to_net_dev(d)); struct adapter *adap = pi->adapter; unsigned int val; - char *endp; ssize_t ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val > 10000000) + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + if (val > 10000000) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index a22768c94200..ee04caa6c4d8 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -709,11 +709,21 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) return ret; } - p->cclk = simple_strtoul(vpd.cclk_data, NULL, 10); - p->mclk = simple_strtoul(vpd.mclk_data, NULL, 10); - p->uclk = simple_strtoul(vpd.uclk_data, NULL, 10); - p->mdc = simple_strtoul(vpd.mdc_data, NULL, 10); - p->mem_timing = simple_strtoul(vpd.mt_data, NULL, 10); + ret = kstrtouint(vpd.cclk_data, 10, &p->cclk); + if (ret) + return ret; + ret = kstrtouint(vpd.mclk_data, 10, &p->mclk); + if (ret) + return ret; + ret = kstrtouint(vpd.uclk_data, 10, &p->uclk); + if (ret) + return ret; + ret = kstrtouint(vpd.mdc_data, 10, &p->mdc); + if (ret) + return ret; + ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing); + if (ret) + return ret; memcpy(p->sn, vpd.sn_data, SERNUM_LEN); /* Old eeproms didn't have port information */ @@ -723,8 +733,12 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) } else { p->port_type[0] = hex_to_bin(vpd.port0_data[0]); p->port_type[1] = hex_to_bin(vpd.port1_data[0]); - p->xauicfg[0] = simple_strtoul(vpd.xaui0cfg_data, NULL, 16); - p->xauicfg[1] = simple_strtoul(vpd.xaui1cfg_data, NULL, 16); + ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]); + if (ret) + return ret; + ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]); + if (ret) + return ret; } ret = hex2bin(p->eth_base, vpd.na_data, 6); diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index c308429dd9c7..d288dcf6062f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -118,6 +118,11 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) ret = clip6_get_mbox(dev, (const struct in6_addr *)lip); if (ret) { write_unlock_bh(&ctbl->lock); + dev_err(adap->pdev_dev, + "CLIP FW cmd failed with error %d, " + "Connections using %pI6c wont be " + "offloaded", + ret, ce->addr6.sin6_addr.s6_addr); return ret; } } else { @@ -127,6 +132,9 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) } } else { write_unlock_bh(&ctbl->lock); + dev_info(adap->pdev_dev, "CLIP table overflow, " + "Connections using %pI6c wont be offloaded", + (void *)lip); return -ENOMEM; } write_unlock_bh(&ctbl->lock); @@ -146,6 +154,9 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6) int hash; int ret = -1; + if (!ctbl) + return; + hash = clip_addr_hash(ctbl, addr, v6); read_lock_bh(&ctbl->lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 55a47de544ea..e01e7228f607 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -483,6 +483,8 @@ struct sge_fl { /* SGE free-buffer queue state */ unsigned int pidx; /* producer index */ unsigned long alloc_failed; /* # of times buffer allocation failed */ unsigned long large_alloc_failed; + unsigned long mapping_err; /* # of RX Buffer DMA Mapping failures */ + unsigned long low; /* # of times momentarily starving */ unsigned long starving; /* RO fields */ unsigned int cntxt_id; /* SGE context id for the free list */ @@ -618,6 +620,7 @@ struct sge_ofld_txq { /* state for an SGE offload Tx queue */ struct adapter *adap; struct sk_buff_head sendq; /* list of backpressured packets */ struct tasklet_struct qresume_tsk; /* restarts the queue */ + bool service_ofldq_running; /* service_ofldq() is processing sendq */ u8 full; /* the Tx ring is full */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ } ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 4269944c5db5..0d579b192350 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2325,6 +2325,8 @@ do { \ TL("TxMapErr:", mapping_err); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (iscsi_idx < iscsi_entries) { @@ -2359,6 +2361,8 @@ do { \ RL("RxNoMem:", stats.nomem); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (rdma_idx < rdma_entries) { @@ -2388,6 +2392,8 @@ do { \ RL("RxNoMem:", stats.nomem); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (ciq_idx < ciq_entries) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index a077f9476daf..2a61a42ab033 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -35,79 +35,79 @@ static void set_msglevel(struct net_device *dev, u32 val) } static const char stats_strings[][ETH_GSTRING_LEN] = { - "tx_octets_ok ", - "tx_frames_ok ", - "tx_broadcast_frames ", - "tx_multicast_frames ", - "tx_unicast_frames ", - "tx_error_frames ", - - "tx_frames_64 ", - "tx_frames_65_to_127 ", - "tx_frames_128_to_255 ", - "tx_frames_256_to_511 ", - "tx_frames_512_to_1023 ", - "tx_frames_1024_to_1518 ", - "tx_frames_1519_to_max ", - - "tx_frames_dropped ", - "tx_pause_frames ", - "tx_ppp0_frames ", - "tx_ppp1_frames ", - "tx_ppp2_frames ", - "tx_ppp3_frames ", - "tx_ppp4_frames ", - "tx_ppp5_frames ", - "tx_ppp6_frames ", - "tx_ppp7_frames ", - - "rx_octets_ok ", - "rx_frames_ok ", - "rx_broadcast_frames ", - "rx_multicast_frames ", - "rx_unicast_frames ", - - "rx_frames_too_long ", - "rx_jabber_errors ", - "rx_fcs_errors ", - "rx_length_errors ", - "rx_symbol_errors ", - "rx_runt_frames ", - - "rx_frames_64 ", - "rx_frames_65_to_127 ", - "rx_frames_128_to_255 ", - "rx_frames_256_to_511 ", - "rx_frames_512_to_1023 ", - "rx_frames_1024_to_1518 ", - "rx_frames_1519_to_max ", - - "rx_pause_frames ", - "rx_ppp0_frames ", - "rx_ppp1_frames ", - "rx_ppp2_frames ", - "rx_ppp3_frames ", - "rx_ppp4_frames ", - "rx_ppp5_frames ", - "rx_ppp6_frames ", - "rx_ppp7_frames ", - - "rx_bg0_frames_dropped ", - "rx_bg1_frames_dropped ", - "rx_bg2_frames_dropped ", - "rx_bg3_frames_dropped ", - "rx_bg0_frames_trunc ", - "rx_bg1_frames_trunc ", - "rx_bg2_frames_trunc ", - "rx_bg3_frames_trunc ", - - "tso ", - "tx_csum_offload ", - "rx_csum_good ", - "vlan_extractions ", - "vlan_insertions ", - "gro_packets ", - "gro_merged ", + "tx_octets_ok ", + "tx_frames_ok ", + "tx_broadcast_frames ", + "tx_multicast_frames ", + "tx_unicast_frames ", + "tx_error_frames ", + + "tx_frames_64 ", + "tx_frames_65_to_127 ", + "tx_frames_128_to_255 ", + "tx_frames_256_to_511 ", + "tx_frames_512_to_1023 ", + "tx_frames_1024_to_1518 ", + "tx_frames_1519_to_max ", + + "tx_frames_dropped ", + "tx_pause_frames ", + "tx_ppp0_frames ", + "tx_ppp1_frames ", + "tx_ppp2_frames ", + "tx_ppp3_frames ", + "tx_ppp4_frames ", + "tx_ppp5_frames ", + "tx_ppp6_frames ", + "tx_ppp7_frames ", + + "rx_octets_ok ", + "rx_frames_ok ", + "rx_broadcast_frames ", + "rx_multicast_frames ", + "rx_unicast_frames ", + + "rx_frames_too_long ", + "rx_jabber_errors ", + "rx_fcs_errors ", + "rx_length_errors ", + "rx_symbol_errors ", + "rx_runt_frames ", + + "rx_frames_64 ", + "rx_frames_65_to_127 ", + "rx_frames_128_to_255 ", + "rx_frames_256_to_511 ", + "rx_frames_512_to_1023 ", + "rx_frames_1024_to_1518 ", + "rx_frames_1519_to_max ", + + "rx_pause_frames ", + "rx_ppp0_frames ", + "rx_ppp1_frames ", + "rx_ppp2_frames ", + "rx_ppp3_frames ", + "rx_ppp4_frames ", + "rx_ppp5_frames ", + "rx_ppp6_frames ", + "rx_ppp7_frames ", + + "rx_bg0_frames_dropped ", + "rx_bg1_frames_dropped ", + "rx_bg2_frames_dropped ", + "rx_bg3_frames_dropped ", + "rx_bg0_frames_trunc ", + "rx_bg1_frames_trunc ", + "rx_bg2_frames_trunc ", + "rx_bg3_frames_trunc ", + + "tso ", + "tx_csum_offload ", + "rx_csum_good ", + "vlan_extractions ", + "vlan_insertions ", + "gro_packets ", + "gro_merged ", }; static char adapter_stats_strings[][ETH_GSTRING_LEN] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0d147610a06f..edd706e739fb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4865,15 +4865,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } #if IS_ENABLED(CONFIG_IPV6) - adapter->clipt = t4_init_clip_tbl(adapter->clipt_start, - adapter->clipt_end); - if (!adapter->clipt) { - /* We tolerate a lack of clip_table, giving up - * some functionality + if ((CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) && + (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) { + /* CLIP functionality is not present in hardware, + * hence disable all offload features */ dev_warn(&pdev->dev, - "could not allocate Clip table, continuing\n"); + "CLIP not enabled in hardware, continuing\n"); adapter->params.offload = 0; + } else { + adapter->clipt = t4_init_clip_tbl(adapter->clipt_start, + adapter->clipt_end); + if (!adapter->clipt) { + /* We tolerate a lack of clip_table, giving up + * some functionality + */ + dev_warn(&pdev->dev, + "could not allocate Clip table, continuing\n"); + adapter->params.offload = 0; + } } #endif if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 48d8fbb1c220..8d35ce317f67 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -406,7 +406,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q, */ static inline int reclaimable(const struct sge_txq *q) { - int hw_cidx = ntohs(q->stat->cidx); + int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); hw_cidx -= q->cidx; return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; } @@ -613,6 +613,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { __free_pages(pg, s->fl_pg_order); + q->mapping_err++; goto out; /* do not try small pages for this error */ } mapping |= RX_LARGE_PG_BUF; @@ -642,6 +643,7 @@ alloc_small_pages: PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { put_page(pg); + q->mapping_err++; goto out; } *d++ = cpu_to_be64(mapping); @@ -663,6 +665,7 @@ out: cred = q->avail - cred; if (unlikely(fl_starving(adap, q))) { smp_wmb(); + q->low++; set_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.starving_fl); } @@ -1029,6 +1032,30 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, *p = 0; } +static void *inline_tx_skb_header(const struct sk_buff *skb, + const struct sge_txq *q, void *pos, + int length) +{ + u64 *p; + int left = (void *)q->stat - pos; + + if (likely(length <= left)) { + memcpy(pos, skb->data, length); + pos += length; + } else { + memcpy(pos, skb->data, left); + memcpy(q->desc, skb->data + left, length - left); + pos = (void *)q->desc + (length - left); + } + /* 0-pad to multiple of 16 */ + p = PTR_ALIGN(pos, 8); + if ((uintptr_t)p & 8) { + *p = 0; + return p + 1; + } + return p; +} + /* * Figure out what HW csum a packet wants and return the appropriate control * bits. @@ -1320,7 +1347,7 @@ out_free: dev_kfree_skb_any(skb); */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { - int hw_cidx = ntohs(q->stat->cidx); + int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); int reclaim = hw_cidx - q->cidx; if (reclaim < 0) @@ -1542,24 +1569,50 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb) } /** - * service_ofldq - restart a suspended offload queue + * service_ofldq - service/restart a suspended offload queue * @q: the offload queue * - * Services an offload Tx queue by moving packets from its packet queue - * to the HW Tx ring. The function starts and ends with the queue locked. + * Services an offload Tx queue by moving packets from its Pending Send + * Queue to the Hardware TX ring. The function starts and ends with the + * Send Queue locked, but drops the lock while putting the skb at the + * head of the Send Queue onto the Hardware TX Ring. Dropping the lock + * allows more skbs to be added to the Send Queue by other threads. + * The packet being processed at the head of the Pending Send Queue is + * left on the queue in case we experience DMA Mapping errors, etc. + * and need to give up and restart later. + * + * service_ofldq() can be thought of as a task which opportunistically + * uses other threads execution contexts. We use the Offload Queue + * boolean "service_ofldq_running" to make sure that only one instance + * is ever running at a time ... */ static void service_ofldq(struct sge_ofld_txq *q) { - u64 *pos; + u64 *pos, *before, *end; int credits; struct sk_buff *skb; + struct sge_txq *txq; + unsigned int left; unsigned int written = 0; unsigned int flits, ndesc; + /* If another thread is currently in service_ofldq() processing the + * Pending Send Queue then there's nothing to do. Otherwise, flag + * that we're doing the work and continue. Examining/modifying + * the Offload Queue boolean "service_ofldq_running" must be done + * while holding the Pending Send Queue Lock. + */ + if (q->service_ofldq_running) + return; + q->service_ofldq_running = true; + while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) { - /* - * We drop the lock but leave skb on sendq, thus retaining - * exclusive access to the state of the queue. + /* We drop the lock while we're working with the skb at the + * head of the Pending Send Queue. This allows more skbs to + * be added to the Pending Send Queue while we're working on + * this one. We don't need to lock to guard the TX Ring + * updates because only one thread of execution is ever + * allowed into service_ofldq() at a time. */ spin_unlock(&q->sendq.lock); @@ -1583,9 +1636,32 @@ static void service_ofldq(struct sge_ofld_txq *q) } else { int last_desc, hdr_len = skb_transport_offset(skb); - memcpy(pos, skb->data, hdr_len); - write_sgl(skb, &q->q, (void *)pos + hdr_len, - pos + flits, hdr_len, + /* The WR headers may not fit within one descriptor. + * So we need to deal with wrap-around here. + */ + before = (u64 *)pos; + end = (u64 *)pos + flits; + txq = &q->q; + pos = (void *)inline_tx_skb_header(skb, &q->q, + (void *)pos, + hdr_len); + if (before > (u64 *)pos) { + left = (u8 *)end - (u8 *)txq->stat; + end = (void *)txq->desc + left; + } + + /* If current position is already at the end of the + * ofld queue, reset the current to point to + * start of the queue and update the end ptr as well. + */ + if (pos == (u64 *)txq->stat) { + left = (u8 *)end - (u8 *)txq->stat; + end = (void *)txq->desc + left; + pos = (void *)txq->desc; + } + + write_sgl(skb, &q->q, (void *)pos, + end, hdr_len, (dma_addr_t *)skb->head); #ifdef CONFIG_NEED_DMA_MAP_STATE skb->dev = q->adap->port[0]; @@ -1604,6 +1680,11 @@ static void service_ofldq(struct sge_ofld_txq *q) written = 0; } + /* Reacquire the Pending Send Queue Lock so we can unlink the + * skb we've just successfully transferred to the TX Ring and + * loop for the next skb which may be at the head of the + * Pending Send Queue. + */ spin_lock(&q->sendq.lock); __skb_unlink(skb, &q->sendq); if (is_ofld_imm(skb)) @@ -1611,6 +1692,11 @@ static void service_ofldq(struct sge_ofld_txq *q) } if (likely(written)) ring_tx_db(q->adap, &q->q, written); + + /*Indicate that no thread is processing the Pending Send Queue + * currently. + */ + q->service_ofldq_running = false; } /** @@ -1624,9 +1710,19 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb) { skb->priority = calc_tx_flits_ofld(skb); /* save for restart */ spin_lock(&q->sendq.lock); + + /* Queue the new skb onto the Offload Queue's Pending Send Queue. If + * that results in this new skb being the only one on the queue, start + * servicing it. If there are other skbs already on the list, then + * either the queue is currently being processed or it's been stopped + * for some reason and it'll be restarted at a later time. Restart + * paths are triggered by events like experiencing a DMA Mapping Error + * or filling the Hardware TX Ring. + */ __skb_queue_tail(&q->sendq, skb); if (q->sendq.qlen == 1) service_ofldq(q); + spin_unlock(&q->sendq.lock); return NET_XMIT_SUCCESS; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 03ed00c49823..a8dda635456d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -162,6 +162,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5095), /* Custom T540-CR-SO */ CH_PCI_ID_TABLE_FENTRY(0x5096), /* Custom T580-CR */ CH_PCI_ID_TABLE_FENTRY(0x5097), /* Custom T520-KR */ + CH_PCI_ID_TABLE_FENTRY(0x5098), /* Custom 2x40G QSFP */ + CH_PCI_ID_TABLE_FENTRY(0x5099), /* Custom 2x40G QSFP */ + CH_PCI_ID_TABLE_FENTRY(0x509a), /* Custom T520-CR */ /* T6 adapters: */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index fc3044c8ac1c..91b52a21a2e7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2802,6 +2802,10 @@ #define HASHEN_V(x) ((x) << HASHEN_S) #define HASHEN_F HASHEN_V(1U) +#define ASLIPCOMPEN_S 17 +#define ASLIPCOMPEN_V(x) ((x) << ASLIPCOMPEN_S) +#define ASLIPCOMPEN_F ASLIPCOMPEN_V(1U) + #define REQQPARERR_S 16 #define REQQPARERR_V(x) ((x) << REQQPARERR_S) #define REQQPARERR_F REQQPARERR_V(1U) diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 8966f3159bb2..3acde3b9b767 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1990,7 +1990,7 @@ SetMulticastFilter(struct net_device *dev) static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST; -static int __init de4x5_eisa_probe (struct device *gendev) +static int de4x5_eisa_probe(struct device *gendev) { struct eisa_device *edev; u_long iobase; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index ed41559bae77..b553409e04ad 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -98,8 +98,7 @@ static int csr0 = 0x01A00000 | 0x4800; #elif defined(__mips__) static int csr0 = 0x00200000 | 0x4000; #else -#warning Processor architecture undefined! -static int csr0 = 0x00A00000 | 0x4800; +static int csr0; #endif /* Operational parameters that usually are not changed. */ @@ -1982,6 +1981,12 @@ static int __init tulip_init (void) pr_info("%s", version); #endif + if (!csr0) { + pr_warn("tulip: unknown CPU architecture, using default csr0\n"); + /* default to 8 longword cache line alignment */ + csr0 = 0x00A00000 | 0x4800; + } + /* copy module parms into globals */ tulip_rx_copybreak = rx_copybreak; tulip_max_interrupt_work = max_interrupt_work; diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 9beb3d34d4ba..3c0e4d5c5fef 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -907,7 +907,7 @@ static void init_registers(struct net_device *dev) #elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM) i |= 0x4800; #else -#warning Processor architecture undefined + dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n"); i |= 0x4800; #endif iowrite32(i, ioaddr + PCIBusCfg); diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index ff76d4e9dc1b..bee32a9d9876 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -7,7 +7,8 @@ config NET_VENDOR_FREESCALE default y depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \ M523x || M527x || M5272 || M528x || M520x || M532x || \ - ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) + ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \ + ARCH_LAYERSCAPE ---help--- If you have a network (Ethernet) card belonging to this class, say Y. diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b2a32209ffbf..d2328fc5da57 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3277,7 +3277,6 @@ static void fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) { struct device_node *np = pdev->dev.of_node; - int err; *num_tx = *num_rx = 1; @@ -3285,13 +3284,9 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) return; /* parse the num of tx and rx queues */ - err = of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - if (err) - *num_tx = 1; + of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - err = of_property_read_u32(np, "fsl,num-rx-queues", num_rx); - if (err) - *num_rx = 1; + of_property_read_u32(np, "fsl,num-rx-queues", num_rx); if (*num_tx < 1 || *num_tx > FEC_ENET_MAX_TX_QS) { dev_warn(&pdev->dev, "Invalid num_tx(=%d), fall back to 1\n", diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index afe7f39cdd7c..25553ee857b4 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -1084,27 +1084,23 @@ static struct platform_driver mpc52xx_fec_driver = { /* Module */ /* ======================================================================== */ +static struct platform_driver * const drivers[] = { +#ifdef CONFIG_FEC_MPC52xx_MDIO + &mpc52xx_fec_mdio_driver, +#endif + &mpc52xx_fec_driver, +}; + static int __init mpc52xx_fec_init(void) { -#ifdef CONFIG_FEC_MPC52xx_MDIO - int ret; - ret = platform_driver_register(&mpc52xx_fec_mdio_driver); - if (ret) { - pr_err("failed to register mdio driver\n"); - return ret; - } -#endif - return platform_driver_register(&mpc52xx_fec_driver); + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit mpc52xx_fec_exit(void) { - platform_driver_unregister(&mpc52xx_fec_driver); -#ifdef CONFIG_FEC_MPC52xx_MDIO - platform_driver_unregister(&mpc52xx_fec_mdio_driver); -#endif + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 67b1850c034e..4ce60e0c8341 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -647,9 +647,9 @@ static int gfar_parse_group(struct device_node *np, if (model && strcasecmp(model, "FEC")) { gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1); gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2); - if (gfar_irq(grp, TX)->irq == NO_IRQ || - gfar_irq(grp, RX)->irq == NO_IRQ || - gfar_irq(grp, ER)->irq == NO_IRQ) + if (!gfar_irq(grp, TX)->irq || + !gfar_irq(grp, RX)->irq || + !gfar_irq(grp, ER)->irq) return -EINVAL; } diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 664d0c261269..b40fba929d65 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -467,7 +467,7 @@ static int gianfar_ptp_probe(struct platform_device *dev) etsects->irq = platform_get_irq(dev, 0); - if (etsects->irq == NO_IRQ) { + if (etsects->irq < 0) { pr_err("irq not in device tree\n"); goto no_node; } diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index cec95ac8687d..6ca94dc3dda3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -35,7 +35,7 @@ #include <linux/phy.h> #include <linux/types.h> -#define HNAE_DRIVER_VERSION "1.3.0" +#define HNAE_DRIVER_VERSION "2.0" #define HNAE_DRIVER_NAME "hns" #define HNAE_COPYRIGHT "Copyright(c) 2015 Huawei Corporation." #define HNAE_DRIVER_STRING "Hisilicon Network Subsystem Driver" @@ -63,6 +63,7 @@ do { \ #define AE_VERSION_1 ('6' << 16 | '6' << 8 | '0') #define AE_VERSION_2 ('1' << 24 | '6' << 16 | '1' << 8 | '0') +#define AE_IS_VER1(ver) ((ver) == AE_VERSION_1) #define AE_NAME_SIZE 16 /* some said the RX and TX RCB format should not be the same in the future. But @@ -144,23 +145,61 @@ enum hnae_led_state { #define HNS_RXD_ASID_S 24 #define HNS_RXD_ASID_M (0xff << HNS_RXD_ASID_S) +#define HNSV2_TXD_BUFNUM_S 0 +#define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S) +#define HNSV2_TXD_RI_B 1 +#define HNSV2_TXD_L4CS_B 2 +#define HNSV2_TXD_L3CS_B 3 +#define HNSV2_TXD_FE_B 4 +#define HNSV2_TXD_VLD_B 5 + +#define HNSV2_TXD_TSE_B 0 +#define HNSV2_TXD_VLAN_EN_B 1 +#define HNSV2_TXD_SNAP_B 2 +#define HNSV2_TXD_IPV6_B 3 +#define HNSV2_TXD_SCTP_B 4 + /* hardware spec ring buffer format */ struct __packed hnae_desc { __le64 addr; union { struct { - __le16 asid_bufnum_pid; + union { + __le16 asid_bufnum_pid; + __le16 asid; + }; __le16 send_size; - __le32 flag_ipoffset; - __le32 reserved_3[4]; + union { + __le32 flag_ipoffset; + struct { + __u8 bn_pid; + __u8 ra_ri_cs_fe_vld; + __u8 ip_offset; + __u8 tse_vlan_snap_v6_sctp_nth; + }; + }; + __le16 mss; + __u8 l4_len; + __u8 reserved1; + __le16 paylen; + __u8 vmid; + __u8 qid; + __le32 reserved2[2]; } tx; struct { __le32 ipoff_bnum_pid_flag; __le16 pkt_len; __le16 size; - __le32 vlan_pri_asid; - __le32 reserved_2[3]; + union { + __le32 vlan_pri_asid; + struct { + __le16 asid; + __le16 vlan_cfi_pri; + }; + }; + __le32 rss_hash; + __le32 reserved_1[2]; } rx; }; }; @@ -302,7 +341,8 @@ struct hnae_queue { void __iomem *io_base; phys_addr_t phy_base; struct hnae_ae_dev *dev; /* the device who use this queue */ - struct hnae_ring rx_ring, tx_ring; + struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; + struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp; struct hnae_handle *handle; }; @@ -435,6 +475,7 @@ struct hnae_ae_ops { int (*set_mac_addr)(struct hnae_handle *handle, void *p); int (*set_mc_addr)(struct hnae_handle *handle, void *addr); int (*set_mtu)(struct hnae_handle *handle, int new_mtu); + void (*set_tso_stats)(struct hnae_handle *handle, int enable); void (*update_stats)(struct hnae_handle *handle, struct net_device_stats *net_stats); void (*get_stats)(struct hnae_handle *handle, u64 *data); @@ -446,6 +487,12 @@ struct hnae_ae_ops { enum hnae_led_state status); void (*get_regs)(struct hnae_handle *handle, void *data); int (*get_regs_len)(struct hnae_handle *handle); + u32 (*get_rss_key_size)(struct hnae_handle *handle); + u32 (*get_rss_indir_size)(struct hnae_handle *handle); + int (*get_rss)(struct hnae_handle *handle, u32 *indir, u8 *key, + u8 *hfunc); + int (*set_rss)(struct hnae_handle *handle, const u32 *indir, + const u8 *key, const u8 hfunc); }; struct hnae_ae_dev { @@ -551,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, struct hnae_desc_cb *res_cb) { struct hnae_buf_ops *bops = ring->q->handle->bops; - struct hnae_desc_cb tmp_cb = ring->desc_cb[i]; bops->unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - *res_cb = tmp_cb; ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 1a16c0307b47..522b264866b4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -252,7 +252,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) if (mac_cb->mac_type != HNAE_PORT_SERVICE) return 0; - ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, ENABLE); + ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, true); if (ret) { dev_err(handle->owner_dev, "mac add mul_mac:%pM port%d fail, ret = %#x!\n", @@ -261,7 +261,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) } ret = hns_mac_set_multi(mac_cb, DSAF_BASE_INNER_PORT_NUM, - mac_addr, ENABLE); + mac_addr, true); if (ret) dev_err(handle->owner_dev, "mac add mul_mac:%pM port%d fail, ret = %#x!\n", @@ -277,12 +277,19 @@ static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu) return hns_mac_set_mtu(mac_cb, new_mtu); } +static void hns_ae_set_tso_stats(struct hnae_handle *handle, int enable) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + hns_ppe_set_tso_enable(ppe_cb, enable); +} + static int hns_ae_start(struct hnae_handle *handle) { int ret; struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); - ret = hns_mac_vm_config_bc_en(mac_cb, 0, ENABLE); + ret = hns_mac_vm_config_bc_en(mac_cb, 0, true); if (ret) return ret; @@ -309,7 +316,7 @@ void hns_ae_stop(struct hnae_handle *handle) hns_ae_ring_enable_all(handle, 0); - (void)hns_mac_vm_config_bc_en(mac_cb, 0, DISABLE); + (void)hns_mac_vm_config_bc_en(mac_cb, 0, false); } static void hns_ae_reset(struct hnae_handle *handle) @@ -334,12 +341,30 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask) else flag = RCB_INT_FLAG_RX; - hns_rcb_int_clr_hw(ring->q, flag); hns_rcb_int_ctrl_hw(ring->q, flag, mask); } +static void hns_aev2_toggle_ring_irq(struct hnae_ring *ring, u32 mask) +{ + u32 flag; + + if (is_tx_ring(ring)) + flag = RCB_INT_FLAG_TX; + else + flag = RCB_INT_FLAG_RX; + + hns_rcbv2_int_ctrl_hw(ring->q, flag, mask); +} + static void hns_ae_toggle_queue_status(struct hnae_queue *queue, u32 val) { + struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(queue->dev); + + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + hns_rcb_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX); + else + hns_rcbv2_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX); + hns_rcb_start(queue, val); } @@ -730,6 +755,53 @@ int hns_ae_get_regs_len(struct hnae_handle *handle) return total_num; } +static u32 hns_ae_get_rss_key_size(struct hnae_handle *handle) +{ + return HNS_PPEV2_RSS_KEY_SIZE; +} + +static u32 hns_ae_get_rss_indir_size(struct hnae_handle *handle) +{ + return HNS_PPEV2_RSS_IND_TBL_SIZE; +} + +static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + /* currently we support only one type of hash function i.e. Toep hash */ + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + /* get the RSS Key required by the user */ + if (key) + memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); + + /* update the current hash->queue mappings from the shadow RSS table */ + memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE); + + return 0; +} + +static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + /* set the RSS Hash Key if specififed by the user */ + if (key) + hns_ppe_set_rss_key(ppe_cb, (int *)key); + + /* update the shadow RSS table with user specified qids */ + memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE); + + /* now update the hardware */ + hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); + + return 0; +} + static struct hnae_ae_ops hns_dsaf_ops = { .get_handle = hns_ae_get_handle, .put_handle = hns_ae_put_handle, @@ -758,19 +830,34 @@ static struct hnae_ae_ops hns_dsaf_ops = { .set_mc_addr = hns_ae_set_multicast_one, .set_mtu = hns_ae_set_mtu, .update_stats = hns_ae_update_stats, + .set_tso_stats = hns_ae_set_tso_stats, .get_stats = hns_ae_get_stats, .get_strings = hns_ae_get_strings, .get_sset_count = hns_ae_get_sset_count, .update_led_status = hns_ae_update_led_status, .set_led_id = hns_ae_cpld_set_led_id, .get_regs = hns_ae_get_regs, - .get_regs_len = hns_ae_get_regs_len + .get_regs_len = hns_ae_get_regs_len, + .get_rss_key_size = hns_ae_get_rss_key_size, + .get_rss_indir_size = hns_ae_get_rss_indir_size, + .get_rss = hns_ae_get_rss, + .set_rss = hns_ae_set_rss }; int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev) { struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev; + switch (dsaf_dev->dsaf_ver) { + case AE_VERSION_1: + hns_dsaf_ops.toggle_ring_irq = hns_ae_toggle_ring_irq; + break; + case AE_VERSION_2: + hns_dsaf_ops.toggle_ring_irq = hns_aev2_toggle_ring_irq; + break; + default: + break; + } ae_dev->ops = &hns_dsaf_ops; ae_dev->dev = dsaf_dev->dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 026b38676cba..5ef0e96e918a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -283,7 +283,7 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, } int hns_mac_set_multi(struct hns_mac_cb *mac_cb, - u32 port_num, char *addr, u8 en) + u32 port_num, char *addr, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -295,7 +295,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb, mac_entry.in_port_num = mac_cb->mac_id; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -368,7 +368,7 @@ static void hns_mac_param_get(struct mac_params *param, *retuen 0 - success , negative --fail */ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, - u32 port_num, u16 vlan_id, u8 en) + u32 port_num, u16 vlan_id, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -386,7 +386,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, mac_entry.in_port_num = mac_cb->mac_id; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -403,7 +403,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, *@en:enable *retuen 0 - success , negative --fail */ -int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en) +int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -427,7 +427,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en) return ret; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -648,7 +648,7 @@ static int hns_mac_init_ex(struct hns_mac_cb *mac_cb) hns_mac_adjust_link(mac_cb, mac_cb->speed, !mac_cb->half_duplex); - ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, ENABLE); + ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, true); if (ret) goto free_mac_drv; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 7da95a7581f9..0b052191d751 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -425,8 +425,8 @@ void mac_adjust_link(struct net_device *net_dev); void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status); int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr); int hns_mac_set_multi(struct hns_mac_cb *mac_cb, - u32 port_num, char *addr, u8 en); -int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, u8 en); + u32 port_num, char *addr, bool enable); +int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable); void hns_mac_start(struct hns_mac_cb *mac_cb); void hns_mac_stop(struct hns_mac_cb *mac_cb); int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 2a98eba660c0..636b205a2366 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -38,10 +38,10 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) const char *name, *mode_str; struct device_node *np = dsaf_dev->dev->of_node; - if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v2")) - dsaf_dev->dsaf_ver = AE_VERSION_2; - else + if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1")) dsaf_dev->dsaf_ver = AE_VERSION_1; + else + dsaf_dev->dsaf_ver = AE_VERSION_2; ret = of_property_read_string(np, "dsa_name", &name); if (ret) { @@ -274,6 +274,8 @@ static void hns_dsaf_stp_port_type_cfg(struct dsaf_device *dsaf_dev, } } +#define HNS_DSAF_SBM_NUM(dev) \ + (AE_IS_VER1((dev)->dsaf_ver) ? DSAF_SBM_NUM : DSAFV2_SBM_NUM) /** * hns_dsaf_sbm_cfg - config sbm * @dsaf_id: dsa fabric id @@ -283,7 +285,7 @@ static void hns_dsaf_sbm_cfg(struct dsaf_device *dsaf_dev) u32 o_sbm_cfg; u32 i; - for (i = 0; i < DSAF_SBM_NUM; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { o_sbm_cfg = dsaf_read_dev(dsaf_dev, DSAF_SBM_CFG_REG_0_REG + 0x80 * i); dsaf_set_bit(o_sbm_cfg, DSAF_SBM_CFG_EN_S, 1); @@ -304,13 +306,19 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev) u32 reg; u32 read_cnt; - for (i = 0; i < DSAF_SBM_NUM; i++) { + /* validate configure by setting SBM_CFG_MIB_EN bit from 0 to 1. */ + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { + reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; + dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 0); + } + + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 1); } /* waitint for all sbm enable finished */ - for (i = 0; i < DSAF_SBM_NUM; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { read_cnt = 0; reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; do { @@ -338,83 +346,156 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev) */ static void hns_dsaf_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev) { - u32 o_sbm_bp_cfg0; - u32 o_sbm_bp_cfg1; - u32 o_sbm_bp_cfg2; - u32 o_sbm_bp_cfg3; + u32 o_sbm_bp_cfg; u32 reg; u32 i; /* XGE */ for (i = 0; i < DSAF_XGE_NUM; i++) { reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg0 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_S, 512); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg1 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0); - dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg1); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 104); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 128); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg3, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110); - dsaf_set_field(o_sbm_bp_cfg3, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); /* for no enable pfc mode */ reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg3, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 128); - dsaf_set_field(o_sbm_bp_cfg3, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 192); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } /* PPE */ for (i = 0; i < DSAF_COMM_CHN; i++) { reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 10); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 12); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } /* RoCEE */ for (i = 0; i < DSAF_COMM_CHN; i++) { reg = DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 2); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 4); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + } +} + +static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev) +{ + u32 o_sbm_bp_cfg; + u32 reg; + u32 i; + + /* XGE */ + for (i = 0; i < DSAFV2_SBM_XGE_CHN; i++) { + reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S, 256); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 104); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 128); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + /* for no enable pfc mode */ + reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + } + + /* PPE */ + reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 10); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 12); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + /* RoCEE */ + for (i = 0; i < DASFV2_ROCEE_CRD_NUM; i++) { + reg = DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 2); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 4); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } } @@ -985,11 +1066,38 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev) else tc_cfg = HNS_DSAF_I8TC_CFG; + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + for (i = 0; i < DSAF_INODE_NUM; i++) { + reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; + dsaf_set_dev_field(dsaf_dev, reg, + DSAF_INODE_IN_PORT_NUM_M, + DSAF_INODE_IN_PORT_NUM_S, + i % DSAF_XGE_NUM); + } + } else { + for (i = 0; i < DSAF_PORT_TYPE_NUM; i++) { + reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; + dsaf_set_dev_field(dsaf_dev, reg, + DSAF_INODE_IN_PORT_NUM_M, + DSAF_INODE_IN_PORT_NUM_S, 0); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT1_NUM_M, + DSAFV2_INODE_IN_PORT1_NUM_S, 1); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT2_NUM_M, + DSAFV2_INODE_IN_PORT2_NUM_S, 2); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT3_NUM_M, + DSAFV2_INODE_IN_PORT3_NUM_S, 3); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT4_NUM_M, + DSAFV2_INODE_IN_PORT4_NUM_S, 4); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT5_NUM_M, + DSAFV2_INODE_IN_PORT5_NUM_S, 5); + } + } for (i = 0; i < DSAF_INODE_NUM; i++) { - reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; - dsaf_set_dev_field(dsaf_dev, reg, DSAF_INODE_IN_PORT_NUM_M, - DSAF_INODE_IN_PORT_NUM_S, i % DSAF_XGE_NUM); - reg = DSAF_INODE_PRI_TC_CFG_0_REG + 0x80 * i; dsaf_write_dev(dsaf_dev, reg, tc_cfg); } @@ -1002,10 +1110,17 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev) static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev) { u32 flag; + u32 finish_msk; u32 cnt = 0; int ret; - hns_dsaf_sbm_bp_wl_cfg(dsaf_dev); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + hns_dsaf_sbm_bp_wl_cfg(dsaf_dev); + finish_msk = DSAF_SRAM_INIT_OVER_M; + } else { + hns_dsafv2_sbm_bp_wl_cfg(dsaf_dev); + finish_msk = DSAFV2_SRAM_INIT_OVER_M; + } /* enable sbm chanel, disable sbm chanel shcut function*/ hns_dsaf_sbm_cfg(dsaf_dev); @@ -1024,11 +1139,13 @@ static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev) do { usleep_range(200, 210);/*udelay(200);*/ - flag = dsaf_read_dev(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG); + flag = dsaf_get_dev_field(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG, + finish_msk, DSAF_SRAM_INIT_OVER_S); cnt++; - } while (flag != DSAF_SRAM_INIT_FINISH_FLAG && cnt < DSAF_CFG_READ_CNT); + } while (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S) && + cnt < DSAF_CFG_READ_CNT); - if (flag != DSAF_SRAM_INIT_FINISH_FLAG) { + if (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S)) { dev_err(dsaf_dev->dev, "hns_dsaf_sbm_init fail %s, flag=%d, cnt=%d\n", dsaf_dev->ae_dev.name, flag, cnt); @@ -2032,7 +2149,7 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4); /* dsaf inode registers */ - for (i = 0; i < DSAF_SBM_NUM / DSAF_COMM_CHN; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; p[232 + i] = dsaf_read_dev(ddev, DSAF_SBM_CFG_REG_0_REG + j * 0x80); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index b2b93484995c..31c312f9826e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -19,24 +19,20 @@ struct hns_mac_cb; #define DSAF_DRV_NAME "hns_dsaf" #define DSAF_MOD_VERSION "v1.0" -#define ENABLE (0x1) -#define DISABLE (0x0) +#define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000 -#define HNS_DSAF_DEBUG_NW_REG_OFFSET (0x100000) +#define DSAF_BASE_INNER_PORT_NUM 127/* mac tbl qid*/ -#define DSAF_BASE_INNER_PORT_NUM (127) /* mac tbl qid*/ +#define DSAF_MAX_CHIP_NUM 2 /*max 2 chips */ -#define DSAF_MAX_CHIP_NUM (2) /*max 2 chips */ +#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE 22 -#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE (22) +#define HNS_DSAF_MAX_DESC_CNT 1024 +#define HNS_DSAF_MIN_DESC_CNT 16 -#define HNS_DSAF_MAX_DESC_CNT (1024) -#define HNS_DSAF_MIN_DESC_CNT (16) +#define DSAF_INVALID_ENTRY_IDX 0xffff -#define DSAF_INVALID_ENTRY_IDX (0xffff) - -#define DSAF_CFG_READ_CNT (30) -#define DSAF_SRAM_INIT_FINISH_FLAG (0xff) +#define DSAF_CFG_READ_CNT 30 #define MAC_NUM_OCTETS_PER_ADDR 6 @@ -274,10 +270,6 @@ struct dsaf_device { struct device *dev; struct hnae_ae_dev ae_dev; - void *priv; - - int virq[DSAF_IRQ_NUM]; - u8 __iomem *sc_base; u8 __iomem *sds_base; u8 __iomem *ppe_base; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 523e9b83d304..607c3be42241 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -149,7 +149,11 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) if (port < DSAF_SERVICE_NW_NUM) { reg_val_1 = 0x1 << port; - reg_val_2 = 0x1041041 << port; + /* there is difference between V1 and V2 in register.*/ + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + reg_val_2 = 0x1041041 << port; + else + reg_val_2 = 0x2082082 << port; if (val == 0) { dsaf_write_reg(dsaf_dev->sc_base, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index 67f33f185a44..f302ef9073c6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -19,6 +19,48 @@ #include "hns_dsaf_ppe.h" +void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value) +{ + dsaf_set_dev_bit(ppe_cb, PPEV2_CFG_TSO_EN_REG, 0, !!value); +} + +void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, + const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]) +{ + int key_item = 0; + + for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++) + dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4, + rss_key[key_item]); +} + +void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, + const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE]) +{ + int i; + int reg_value; + + for (i = 0; i < (HNS_PPEV2_RSS_IND_TBL_SIZE / 4); i++) { + reg_value = dsaf_read_dev(ppe_cb, + PPEV2_INDRECTION_TBL_REG + i * 0x4); + + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N0_M, + PPEV2_CFG_RSS_TBL_4N0_S, + rss_tab[i * 4 + 0] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N1_M, + PPEV2_CFG_RSS_TBL_4N1_S, + rss_tab[i * 4 + 1] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N2_M, + PPEV2_CFG_RSS_TBL_4N2_S, + rss_tab[i * 4 + 2] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N3_M, + PPEV2_CFG_RSS_TBL_4N3_S, + rss_tab[i * 4 + 3] & 0x1F); + dsaf_write_dev( + ppe_cb, PPEV2_INDRECTION_TBL_REG + i * 0x4, reg_value); + } +} + static void __iomem *hns_ppe_common_get_ioaddr( struct ppe_common_cb *ppe_common) { @@ -134,6 +176,11 @@ static void hns_ppe_cnt_clr_ce(struct hns_ppe_cb *ppe_cb) PPE_CNT_CLR_CE_B, 1); } +static void hns_ppe_set_vlan_strip(struct hns_ppe_cb *ppe_cb, int en) +{ + dsaf_write_dev(ppe_cb, PPEV2_VLAN_STRIP_EN_REG, en); +} + /** * hns_ppe_checksum_hw - set ppe checksum caculate * @ppe_device: ppe device @@ -266,13 +313,17 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en) /** * ppe_init_hw - init ppe - * @ppe_device: ppe device + * @ppe_cb: ppe device */ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) { struct ppe_common_cb *ppe_common_cb = ppe_cb->ppe_common_cb; u32 port = ppe_cb->port; struct dsaf_device *dsaf_dev = ppe_common_cb->dsaf_dev; + int i; + + /* get default RSS key */ + netdev_rss_key_fill(ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); hns_ppe_srst_by_port(dsaf_dev, port, 0); mdelay(10); @@ -285,8 +336,21 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) hns_ppe_set_port_mode(ppe_cb, PPE_MODE_GE); else hns_ppe_set_port_mode(ppe_cb, PPE_MODE_XGE); + hns_ppe_checksum_hw(ppe_cb, 0xffffffff); hns_ppe_cnt_clr_ce(ppe_cb); + + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + hns_ppe_set_vlan_strip(ppe_cb, 0); + + /* set default RSS key in h/w */ + hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key); + + /* Set default indrection table in h/w */ + for (i = 0; i < HNS_PPEV2_RSS_IND_TBL_SIZE; i++) + ppe_cb->rss_indir_table[i] = i; + hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); + } } /** @@ -341,13 +405,13 @@ void hns_ppe_reset_common(struct dsaf_device *dsaf_dev, u8 ppe_common_index) if (ret) return; + for (i = 0; i < ppe_common->ppe_num; i++) + hns_ppe_init_hw(&ppe_common->ppe_cb[i]); + ret = hns_rcb_common_init_hw(dsaf_dev->rcb_common[ppe_common_index]); if (ret) return; - for (i = 0; i < ppe_common->ppe_num; i++) - hns_ppe_init_hw(&ppe_common->ppe_cb[i]); - hns_rcb_common_init_commit_hw(dsaf_dev->rcb_common[ppe_common_index]); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 4894f9a0d39f..0f5cb6962acf 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -25,15 +25,24 @@ #define ETH_PPE_DUMP_NUM 576 #define ETH_PPE_STATIC_NUM 12 + +#define HNS_PPEV2_RSS_IND_TBL_SIZE 256 +#define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */ +#define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32)) + enum ppe_qid_mode { - PPE_QID_MODE0 = 0, /* fixed queue id mode */ - PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ - PPE_QID_MODE2, /* switch:32VM/4TC non switch:6Port/16VM */ - PPE_QID_MODE3, /* switch:4TC/8TAG non switch:2Port/64VM */ - PPE_QID_MODE4, /* switch:8VM/16TAG non switch:2Port/16VM/4TC */ - PPE_QID_MODE5, /* non switch:6Port/16TAG */ - PPE_QID_MODE6, /* non switch:6Port/2VM/8TC */ - PPE_QID_MODE7, /* non switch:2Port/8VM/8TC */ + PPE_QID_MODE0 = 0, /* fixed queue id mode */ + PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ + PPE_QID_MODE2, /* switch:32VM/4TC non switch:6Port/16VM */ + PPE_QID_MODE3, /* switch:4TC/8RSS non switch:2Port/64VM */ + PPE_QID_MODE4, /* switch:8VM/16RSS non switch:2Port/16VM/4TC */ + PPE_QID_MODE5, /* switch:16VM/8TC non switch:6Port/16RSS */ + PPE_QID_MODE6, /* switch:32VM/4RSS non switch:6Port/2VM/8TC */ + PPE_QID_MODE7, /* switch:32RSS non switch:2Port/8VM/8TC */ + PPE_QID_MODE8, /* switch:6VM/4TC/4RSS non switch:2Port/16VM/4RSS */ + PPE_QID_MODE9, /* non switch:2Port/32VM/2RSS */ + PPE_QID_MODE10, /* non switch:2Port/32RSS */ + PPE_QID_MODE11, /* non switch:2Port/4TC/16RSS */ }; enum ppe_port_mode { @@ -72,6 +81,8 @@ struct hns_ppe_cb { u8 port; /* port id in dsaf */ void __iomem *io_base; int virq; + u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */ + u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */ }; struct ppe_common_cb { @@ -102,4 +113,9 @@ void hns_ppe_get_regs(struct hns_ppe_cb *ppe_cb, void *data); void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data); void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data); +void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value); +void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, + const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]); +void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, + const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE]); #endif /* _HNS_DSAF_PPE_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 4db32c62f062..8c30cec8850a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -136,19 +136,37 @@ void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask) void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag) { - u32 clr = 1; - if (flag & RCB_INT_FLAG_TX) { - dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, clr); - dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, clr); + dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, 1); + dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, 1); } if (flag & RCB_INT_FLAG_RX) { - dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, clr); - dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, clr); + dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, 1); + dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, 1); } } +void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask) +{ + u32 int_mask_en = !!mask; + + if (flag & RCB_INT_FLAG_TX) + dsaf_write_dev(q, RCB_RING_INTMSK_TXWL_REG, int_mask_en); + + if (flag & RCB_INT_FLAG_RX) + dsaf_write_dev(q, RCB_RING_INTMSK_RXWL_REG, int_mask_en); +} + +void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag) +{ + if (flag & RCB_INT_FLAG_TX) + dsaf_write_dev(q, RCBV2_TX_RING_INT_STS_REG, 1); + + if (flag & RCB_INT_FLAG_RX) + dsaf_write_dev(q, RCBV2_RX_RING_INT_STS_REG, 1); +} + /** *hns_rcb_ring_enable_hw - enable ring *@ring: rcb ring @@ -193,6 +211,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) (u32)dma); dsaf_write_dev(q, RCB_RING_RX_RING_BASEADDR_H_REG, (u32)((dma >> 31) >> 1)); + dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG, @@ -204,6 +223,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) (u32)dma); dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_H_REG, (u32)((dma >> 31) >> 1)); + dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG, @@ -232,9 +252,6 @@ void hns_rcb_init_hw(struct ring_pair_cb *ring) static void hns_rcb_set_port_desc_cnt(struct rcb_common_cb *rcb_common, u32 port_idx, u32 desc_cnt) { - if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM) - port_idx = 0; - dsaf_write_dev(rcb_common, RCB_CFG_BD_NUM_REG + port_idx * 4, desc_cnt); } @@ -249,8 +266,6 @@ static int hns_rcb_set_port_coalesced_frames(struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames) { - if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM) - port_idx = 0; if (coalesced_frames >= rcb_common->desc_num || coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES) return -EINVAL; @@ -354,6 +369,9 @@ int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common) dsaf_write_dev(rcb_common, RCB_COM_CFG_ENDIAN_REG, HNS_RCB_COMMON_ENDIAN); + dsaf_write_dev(rcb_common, RCB_COM_CFG_FNA_REG, 0x0); + dsaf_write_dev(rcb_common, RCB_COM_CFG_FA_REG, 0x1); + return 0; } @@ -387,19 +405,23 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) struct rcb_common_cb *rcb_common; struct ring_pair_cb *ring_pair_cb; u32 buf_size; - u16 desc_num; - int irq_idx; + u16 desc_num, mdnum_ppkt; + bool irq_idx, is_ver1; ring_pair_cb = container_of(q, struct ring_pair_cb, q); + is_ver1 = AE_IS_VER1(ring_pair_cb->rcb_common->dsaf_dev->dsaf_ver); if (ring_type == RX_RING) { ring = &q->rx_ring; ring->io_base = ring_pair_cb->q.io_base; irq_idx = HNS_RCB_IRQ_IDX_RX; + mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT; } else { ring = &q->tx_ring; ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base + HNS_RCB_TX_REG_OFFSET; irq_idx = HNS_RCB_IRQ_IDX_TX; + mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT : + HNS_RCBV2_RING_MAX_TXBD_PER_PKT; } rcb_common = ring_pair_cb->rcb_common; @@ -414,7 +436,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) ring->buf_size = buf_size; ring->desc_num = desc_num; - ring->max_desc_num_per_pkt = HNS_RCB_RING_MAX_BD_PER_PKT; + ring->max_desc_num_per_pkt = mdnum_ppkt; ring->max_raw_data_sz_per_desc = HNS_RCB_MAX_PKT_SIZE; ring->max_pkt_size = HNS_RCB_MAX_PKT_SIZE; ring->next_to_use = 0; @@ -445,14 +467,22 @@ static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx) return port; } +#define SERVICE_RING_IRQ_IDX(v1) \ + ((v1) ? HNS_SERVICE_RING_IRQ_IDX : HNSV2_SERVICE_RING_IRQ_IDX) +#define DEBUG_RING_IRQ_IDX(v1) \ + ((v1) ? HNS_DEBUG_RING_IRQ_IDX : HNSV2_DEBUG_RING_IRQ_IDX) +#define DEBUG_RING_IRQ_OFFSET(v1) \ + ((v1) ? HNS_DEBUG_RING_IRQ_OFFSET : HNSV2_DEBUG_RING_IRQ_OFFSET) static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common) { int comm_index = rcb_common->comm_index; + bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver); if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) - return HNS_SERVICE_RING_IRQ_IDX; + return SERVICE_RING_IRQ_IDX(is_ver1); else - return HNS_DEBUG_RING_IRQ_IDX + (comm_index - 1) * 2; + return DEBUG_RING_IRQ_IDX(is_ver1) + + (comm_index - 1) * DEBUG_RING_IRQ_OFFSET(is_ver1); } #define RCB_COMM_BASE_TO_RING_BASE(base, ringid)\ @@ -468,6 +498,10 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) u32 ring_num = rcb_common->ring_num; int base_irq_idx = hns_rcb_get_base_irq_idx(rcb_common); struct device_node *np = rcb_common->dsaf_dev->dev->of_node; + struct platform_device *pdev = + container_of(rcb_common->dsaf_dev->dev, + struct platform_device, dev); + bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver); for (i = 0; i < ring_num; i++) { ring_pair_cb = &rcb_common->ring_pair_cb[i]; @@ -477,10 +511,12 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) ring_pair_cb->q.io_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->io_base, i); ring_pair_cb->port_id_in_dsa = hns_rcb_get_port(rcb_common, i); - ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] - = irq_of_parse_and_map(np, base_irq_idx + i * 2); - ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] - = irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1); + ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] = + is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) : + platform_get_irq(pdev, base_irq_idx + i * 3 + 1); + ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] = + is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1) : + platform_get_irq(pdev, base_irq_idx + i * 3); ring_pair_cb->q.phy_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->phy_base, i); hns_rcb_ring_pair_get_cfg(ring_pair_cb); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 3a2afe2dd8bb..29041b18741a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -26,6 +26,8 @@ struct rcb_common_cb; #define HNS_RCB_SERVICE_NW_ENGINE_NUM DSAF_COMM_CHN #define HNS_RCB_DEBUG_NW_ENGINE_NUM 1 #define HNS_RCB_RING_MAX_BD_PER_PKT 3 +#define HNS_RCB_RING_MAX_TXBD_PER_PKT 3 +#define HNS_RCBV2_RING_MAX_TXBD_PER_PKT 8 #define HNS_RCB_MAX_PKT_SIZE MAC_MAX_MTU #define HNS_RCB_RING_MAX_PENDING_BD 1024 @@ -106,13 +108,17 @@ void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index); int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common); void hns_rcb_start(struct hnae_queue *q, u32 val); void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); -void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common); void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, int comm_index, u16 *max_vfn, u16 *max_q_per_vf); +void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common); + void hns_rcb_ring_enable_hw(struct hnae_queue *q, u32 val); void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag); void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 enable); +void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask); +void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag); + void hns_rcb_init_hw(struct ring_pair_cb *ring); void hns_rcb_reset_ring_hw(struct hnae_queue *q); void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index b475e1bf2e6f..c4d7c26952c4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -10,21 +10,12 @@ #ifndef _DSAF_REG_H_ #define _DSAF_REG_H_ -#define HNS_GE_FIFO_ERR_INTNUM 8 -#define HNS_XGE_ERR_INTNUM 6 -#define HNS_RCB_COMM_ERR_INTNUM 12 -#define HNS_PPE_TNL_ERR_INTNUM 8 -#define HNS_DSAF_EVENT_INTNUM 21 -#define HNS_DEBUG_RING_INTNUM 4 -#define HNS_SERVICE_RING_INTNUM 256 - -#define HNS_DEBUG_RING_IRQ_IDX (HNS_GE_FIFO_ERR_INTNUM + HNS_XGE_ERR_INTNUM +\ - HNS_RCB_COMM_ERR_INTNUM + HNS_PPE_TNL_ERR_INTNUM +\ - HNS_DSAF_EVENT_INTNUM) -#define HNS_SERVICE_RING_IRQ_IDX (HNS_DEBUG_RING_IRQ_IDX +\ - HNS_DEBUG_RING_INTNUM) - -#define DSAF_IRQ_NUM 18 +#define HNS_DEBUG_RING_IRQ_IDX 55 +#define HNS_SERVICE_RING_IRQ_IDX 59 +#define HNS_DEBUG_RING_IRQ_OFFSET 2 +#define HNSV2_DEBUG_RING_IRQ_IDX 409 +#define HNSV2_SERVICE_RING_IRQ_IDX 25 +#define HNSV2_DEBUG_RING_IRQ_OFFSET 9 #define DSAF_MAX_PORT_NUM_PER_CHIP 8 #define DSAF_SERVICE_PORT_NUM_PER_DSAF 6 @@ -39,9 +30,15 @@ #define DSAF_GE_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) #define DSAF_PORT_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) #define DSAF_XGE_NUM DSAF_SERVICE_NW_NUM +#define DSAF_PORT_TYPE_NUM 3 #define DSAF_NODE_NUM 18 #define DSAF_XOD_BIG_NUM DSAF_NODE_NUM #define DSAF_SBM_NUM DSAF_NODE_NUM +#define DSAFV2_SBM_NUM 8 +#define DSAFV2_SBM_XGE_CHN 6 +#define DSAFV2_SBM_PPE_CHN 1 +#define DASFV2_ROCEE_CRD_NUM 8 + #define DSAF_VOQ_NUM DSAF_NODE_NUM #define DSAF_INODE_NUM DSAF_NODE_NUM #define DSAF_XOD_NUM 8 @@ -52,56 +49,56 @@ #define DSAF_TCAM_SUM 512 #define DSAF_LINE_SUM (2048 * 14) -#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG 0x100 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG 0x180 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG 0x184 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG 0x188 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG 0x18C -#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG 0x190 -#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG 0x194 -#define DSAF_SUB_SC_DSAF_CLK_EN_REG 0x300 -#define DSAF_SUB_SC_DSAF_CLK_DIS_REG 0x304 -#define DSAF_SUB_SC_NT_CLK_EN_REG 0x308 -#define DSAF_SUB_SC_NT_CLK_DIS_REG 0x30C -#define DSAF_SUB_SC_XGE_CLK_EN_REG 0x310 -#define DSAF_SUB_SC_XGE_CLK_DIS_REG 0x314 -#define DSAF_SUB_SC_GE_CLK_EN_REG 0x318 -#define DSAF_SUB_SC_GE_CLK_DIS_REG 0x31C -#define DSAF_SUB_SC_PPE_CLK_EN_REG 0x320 -#define DSAF_SUB_SC_PPE_CLK_DIS_REG 0x324 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG 0x350 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG 0x354 -#define DSAF_SUB_SC_XBAR_RESET_REQ_REG 0xA00 -#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG 0xA04 -#define DSAF_SUB_SC_NT_RESET_REQ_REG 0xA08 -#define DSAF_SUB_SC_NT_RESET_DREQ_REG 0xA0C -#define DSAF_SUB_SC_XGE_RESET_REQ_REG 0xA10 -#define DSAF_SUB_SC_XGE_RESET_DREQ_REG 0xA14 -#define DSAF_SUB_SC_GE_RESET_REQ0_REG 0xA18 -#define DSAF_SUB_SC_GE_RESET_DREQ0_REG 0xA1C -#define DSAF_SUB_SC_GE_RESET_REQ1_REG 0xA20 -#define DSAF_SUB_SC_GE_RESET_DREQ1_REG 0xA24 -#define DSAF_SUB_SC_PPE_RESET_REQ_REG 0xA48 -#define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C -#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 -#define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 -#define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 -#define DSAF_SUB_SC_NT_CLK_ST_REG 0x5304 -#define DSAF_SUB_SC_XGE_CLK_ST_REG 0x5308 -#define DSAF_SUB_SC_GE_CLK_ST_REG 0x530C -#define DSAF_SUB_SC_PPE_CLK_ST_REG 0x5310 -#define DSAF_SUB_SC_ROCEE_CLK_ST_REG 0x5314 -#define DSAF_SUB_SC_CPU_CLK_ST_REG 0x5318 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG 0x5328 -#define DSAF_SUB_SC_XBAR_RESET_ST_REG 0x5A00 -#define DSAF_SUB_SC_NT_RESET_ST_REG 0x5A04 -#define DSAF_SUB_SC_XGE_RESET_ST_REG 0x5A08 -#define DSAF_SUB_SC_GE_RESET_ST0_REG 0x5A0C -#define DSAF_SUB_SC_GE_RESET_ST1_REG 0x5A10 -#define DSAF_SUB_SC_PPE_RESET_ST_REG 0x5A24 -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG 0x5A44 +#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG 0x100 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG 0x180 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG 0x184 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG 0x188 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG 0x18C +#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG 0x190 +#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG 0x194 +#define DSAF_SUB_SC_DSAF_CLK_EN_REG 0x300 +#define DSAF_SUB_SC_DSAF_CLK_DIS_REG 0x304 +#define DSAF_SUB_SC_NT_CLK_EN_REG 0x308 +#define DSAF_SUB_SC_NT_CLK_DIS_REG 0x30C +#define DSAF_SUB_SC_XGE_CLK_EN_REG 0x310 +#define DSAF_SUB_SC_XGE_CLK_DIS_REG 0x314 +#define DSAF_SUB_SC_GE_CLK_EN_REG 0x318 +#define DSAF_SUB_SC_GE_CLK_DIS_REG 0x31C +#define DSAF_SUB_SC_PPE_CLK_EN_REG 0x320 +#define DSAF_SUB_SC_PPE_CLK_DIS_REG 0x324 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG 0x350 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG 0x354 +#define DSAF_SUB_SC_XBAR_RESET_REQ_REG 0xA00 +#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG 0xA04 +#define DSAF_SUB_SC_NT_RESET_REQ_REG 0xA08 +#define DSAF_SUB_SC_NT_RESET_DREQ_REG 0xA0C +#define DSAF_SUB_SC_XGE_RESET_REQ_REG 0xA10 +#define DSAF_SUB_SC_XGE_RESET_DREQ_REG 0xA14 +#define DSAF_SUB_SC_GE_RESET_REQ0_REG 0xA18 +#define DSAF_SUB_SC_GE_RESET_DREQ0_REG 0xA1C +#define DSAF_SUB_SC_GE_RESET_REQ1_REG 0xA20 +#define DSAF_SUB_SC_GE_RESET_DREQ1_REG 0xA24 +#define DSAF_SUB_SC_PPE_RESET_REQ_REG 0xA48 +#define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C +#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 +#define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 +#define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 +#define DSAF_SUB_SC_NT_CLK_ST_REG 0x5304 +#define DSAF_SUB_SC_XGE_CLK_ST_REG 0x5308 +#define DSAF_SUB_SC_GE_CLK_ST_REG 0x530C +#define DSAF_SUB_SC_PPE_CLK_ST_REG 0x5310 +#define DSAF_SUB_SC_ROCEE_CLK_ST_REG 0x5314 +#define DSAF_SUB_SC_CPU_CLK_ST_REG 0x5318 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG 0x5328 +#define DSAF_SUB_SC_XBAR_RESET_ST_REG 0x5A00 +#define DSAF_SUB_SC_NT_RESET_ST_REG 0x5A04 +#define DSAF_SUB_SC_XGE_RESET_ST_REG 0x5A08 +#define DSAF_SUB_SC_GE_RESET_ST0_REG 0x5A0C +#define DSAF_SUB_SC_GE_RESET_ST1_REG 0x5A10 +#define DSAF_SUB_SC_PPE_RESET_ST_REG 0x5A24 +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG 0x5A44 /*serdes offset**/ #define HNS_MAC_HILINK3_REG DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG @@ -178,6 +175,7 @@ #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG 0x200C #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG 0x230C #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x260C +#define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x238C #define DSAF_SBM_FREE_CNT_0_0_REG 0x2010 #define DSAF_SBM_FREE_CNT_1_0_REG 0x2014 #define DSAF_SBM_BP_CNT_0_0_REG 0x2018 @@ -319,6 +317,8 @@ #define PPE_CFG_TAG_GEN_REG 0x90 #define PPE_CFG_PARSE_TAG_REG 0x94 #define PPE_CFG_PRO_CHECK_EN_REG 0x98 +#define PPEV2_CFG_TSO_EN_REG 0xA0 +#define PPEV2_VLAN_STRIP_EN_REG 0xAC #define PPE_INTEN_REG 0x100 #define PPE_RINT_REG 0x104 #define PPE_INTSTS_REG 0x108 @@ -351,6 +351,8 @@ #define PPE_ECO0_REG 0x32C #define PPE_ECO1_REG 0x330 #define PPE_ECO2_REG 0x334 +#define PPEV2_INDRECTION_TBL_REG 0x800 +#define PPEV2_RSS_KEY_REG 0x900 #define RCB_COM_CFG_ENDIAN_REG 0x0 #define RCB_COM_CFG_SYS_FSH_REG 0xC @@ -431,8 +433,10 @@ #define RCB_RING_INTMSK_RXWL_REG 0x000A0 #define RCB_RING_INTSTS_RX_RING_REG 0x000A4 +#define RCBV2_RX_RING_INT_STS_REG 0x000A8 #define RCB_RING_INTMSK_TXWL_REG 0x000AC #define RCB_RING_INTSTS_TX_RING_REG 0x000B0 +#define RCBV2_TX_RING_INT_STS_REG 0x000B4 #define RCB_RING_INTMSK_RX_OVERTIME_REG 0x000B8 #define RCB_RING_INTSTS_RX_OVERTIME_REG 0x000BC #define RCB_RING_INTMSK_TX_OVERTIME_REG 0x000C4 @@ -678,6 +682,10 @@ #define XGMAC_TRX_CORE_SRST_M 0x2080 +#define DSAF_SRAM_INIT_OVER_M 0xff +#define DSAFV2_SRAM_INIT_OVER_M 0x3ff +#define DSAF_SRAM_INIT_OVER_S 0 + #define DSAF_CFG_EN_S 0 #define DSAF_CFG_TC_MODE_S 1 #define DSAF_CFG_CRC_EN_S 2 @@ -685,6 +693,7 @@ #define DSAF_CFG_MIX_MODE_S 4 #define DSAF_CFG_STP_MODE_S 5 #define DSAF_CFG_LOCA_ADDR_EN_S 6 +#define DSAFV2_CFG_VLAN_TAG_MODE_S 17 #define DSAF_CNT_CLR_CE_S 0 #define DSAF_SNAP_EN_S 1 @@ -707,6 +716,16 @@ #define DSAF_INODE_IN_PORT_NUM_M 7 #define DSAF_INODE_IN_PORT_NUM_S 0 +#define DSAFV2_INODE_IN_PORT1_NUM_M (7ULL << 3) +#define DSAFV2_INODE_IN_PORT1_NUM_S 3 +#define DSAFV2_INODE_IN_PORT2_NUM_M (7ULL << 6) +#define DSAFV2_INODE_IN_PORT2_NUM_S 6 +#define DSAFV2_INODE_IN_PORT3_NUM_M (7ULL << 9) +#define DSAFV2_INODE_IN_PORT3_NUM_S 9 +#define DSAFV2_INODE_IN_PORT4_NUM_M (7ULL << 12) +#define DSAFV2_INODE_IN_PORT4_NUM_S 12 +#define DSAFV2_INODE_IN_PORT5_NUM_M (7ULL << 15) +#define DSAFV2_INODE_IN_PORT5_NUM_S 15 #define HNS_DSAF_I4TC_CFG 0x18688688 #define HNS_DSAF_I8TC_CFG 0x18FAC688 @@ -738,6 +757,33 @@ #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 10 #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 10) - 1) << 10) +#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9) +#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S 18 +#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M (((1ULL << 10) - 1) << 18) + +#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG2_SET_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG2_SET_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S 0 +#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 9 +#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S 0 +#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9 +#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) + #define DSAF_TBL_TCAM_ADDR_S 0 #define DSAF_TBL_TCAM_ADDR_M ((1ULL << 9) - 1) @@ -797,6 +843,18 @@ #define PPE_CFG_QID_MODE_CF_QID_MODE_S 8 #define PPE_CFG_QID_MODE_CF_QID_MODE_M (0x7 << PPE_CFG_QID_MODE_CF_QID_MODE_S) +#define PPEV2_CFG_RSS_TBL_4N0_S 0 +#define PPEV2_CFG_RSS_TBL_4N0_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N0_S) + +#define PPEV2_CFG_RSS_TBL_4N1_S 8 +#define PPEV2_CFG_RSS_TBL_4N1_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N1_S) + +#define PPEV2_CFG_RSS_TBL_4N2_S 16 +#define PPEV2_CFG_RSS_TBL_4N2_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N2_S) + +#define PPEV2_CFG_RSS_TBL_4N3_S 24 +#define PPEV2_CFG_RSS_TBL_4N3_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S) + #define PPE_CNT_CLR_CE_B 0 #define PPE_CNT_CLR_SNAP_EN_B 1 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 08cef0dfb5db..5a81dafd725e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -33,10 +33,105 @@ #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 +#define HNS_BUFFER_SIZE_2048 2048 + +#define BD_MAX_SEND_SIZE 8191 +#define SKB_TMP_LEN(SKB) \ + (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) + +static void fill_v2_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + struct hnae_desc *desc = &ring->desc[ring->next_to_use]; + struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + struct iphdr *iphdr; + struct ipv6hdr *ipv6hdr; + struct sk_buff *skb; + int skb_tmp_len; + __be16 protocol; + u8 bn_pid = 0; + u8 rrcfv = 0; + u8 ip_offset = 0; + u8 tvsvsn = 0; + u16 mss = 0; + u8 l4_len = 0; + u16 paylen = 0; + + desc_cb->priv = priv; + desc_cb->length = size; + desc_cb->dma = dma; + desc_cb->type = type; + + desc->addr = cpu_to_le64(dma); + desc->tx.send_size = cpu_to_le16((u16)size); + + /*config bd buffer end */ + hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); + hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1); + + if (type == DESC_TYPE_SKB) { + skb = (struct sk_buff *)priv; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_reset_mac_len(skb); + protocol = skb->protocol; + ip_offset = ETH_HLEN; + + if (protocol == htons(ETH_P_8021Q)) { + ip_offset += VLAN_HLEN; + protocol = vlan_get_protocol(skb); + skb->protocol = protocol; + } + + if (skb->protocol == htons(ETH_P_IP)) { + iphdr = ip_hdr(skb); + hnae_set_bit(rrcfv, HNSV2_TXD_L3CS_B, 1); + hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); + + /* check for tcp/udp header */ + if (iphdr->protocol == IPPROTO_TCP) { + hnae_set_bit(tvsvsn, + HNSV2_TXD_TSE_B, 1); + skb_tmp_len = SKB_TMP_LEN(skb); + l4_len = tcp_hdrlen(skb); + mss = mtu - skb_tmp_len - ETH_FCS_LEN; + paylen = skb->len - skb_tmp_len; + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1); + ipv6hdr = ipv6_hdr(skb); + hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); + + /* check for tcp/udp header */ + if (ipv6hdr->nexthdr == IPPROTO_TCP) { + hnae_set_bit(tvsvsn, + HNSV2_TXD_TSE_B, 1); + skb_tmp_len = SKB_TMP_LEN(skb); + l4_len = tcp_hdrlen(skb); + mss = mtu - skb_tmp_len - ETH_FCS_LEN; + paylen = skb->len - skb_tmp_len; + } + } + desc->tx.ip_offset = ip_offset; + desc->tx.tse_vlan_snap_v6_sctp_nth = tvsvsn; + desc->tx.mss = cpu_to_le16(mss); + desc->tx.l4_len = l4_len; + desc->tx.paylen = cpu_to_le16(paylen); + } + } + + hnae_set_bit(rrcfv, HNSV2_TXD_FE_B, frag_end); + + desc->tx.bn_pid = bn_pid; + desc->tx.ra_ri_cs_fe_vld = rrcfv; + + ring_ptr_move_fw(ring, next_to_use); +} static void fill_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type) + int buf_num, enum hns_desc_type type, int mtu) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -100,47 +195,129 @@ static void unfill_desc(struct hnae_ring *ring) ring_ptr_move_bw(ring, next_to_use); } -int hns_nic_net_xmit_hw(struct net_device *ndev, - struct sk_buff *skb, - struct hns_nic_ring_data *ring_data) +static int hns_nic_maybe_stop_tx( + struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring) { - struct hns_nic_priv *priv = netdev_priv(ndev); - struct device *dev = priv->dev; - struct hnae_ring *ring = ring_data->ring; - struct netdev_queue *dev_queue; - struct skb_frag_struct *frag; + struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; int buf_num; - dma_addr_t dma; - int size, next_to_use; - int i, j; - struct sk_buff *new_skb; - - assert(ring->max_desc_num_per_pkt <= ring->desc_num); /* no. of segments (plus a header) */ buf_num = skb_shinfo(skb)->nr_frags + 1; if (unlikely(buf_num > ring->max_desc_num_per_pkt)) { - if (ring_space(ring) < 1) { - ring->stats.tx_busy++; - goto out_net_tx_busy; - } + if (ring_space(ring) < 1) + return -EBUSY; new_skb = skb_copy(skb, GFP_ATOMIC); - if (!new_skb) { - ring->stats.sw_err_cnt++; - netdev_err(ndev, "no memory to xmit!\n"); - goto out_err_tx_ok; - } + if (!new_skb) + return -ENOMEM; dev_kfree_skb_any(skb); - skb = new_skb; + *out_skb = new_skb; buf_num = 1; - assert(skb_shinfo(skb)->nr_frags == 1); } else if (buf_num > ring_space(ring)) { + return -EBUSY; + } + + *bnum = buf_num; + return 0; +} + +static int hns_nic_maybe_stop_tso( + struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring) +{ + int i; + int size; + int buf_num; + int frag_num; + struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; + struct skb_frag_struct *frag; + + size = skb_headlen(skb); + buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + + frag_num = skb_shinfo(skb)->nr_frags; + for (i = 0; i < frag_num; i++) { + frag = &skb_shinfo(skb)->frags[i]; + size = skb_frag_size(frag); + buf_num += (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + } + + if (unlikely(buf_num > ring->max_desc_num_per_pkt)) { + buf_num = (skb->len + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + if (ring_space(ring) < buf_num) + return -EBUSY; + /* manual split the send packet */ + new_skb = skb_copy(skb, GFP_ATOMIC); + if (!new_skb) + return -ENOMEM; + dev_kfree_skb_any(skb); + *out_skb = new_skb; + + } else if (ring_space(ring) < buf_num) { + return -EBUSY; + } + + *bnum = buf_num; + return 0; +} + +static void fill_tso_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + int frag_buf_num; + int sizeoflast; + int k; + + frag_buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + sizeoflast = size % BD_MAX_SEND_SIZE; + sizeoflast = sizeoflast ? sizeoflast : BD_MAX_SEND_SIZE; + + /* when the frag size is bigger than hardware, split this frag */ + for (k = 0; k < frag_buf_num; k++) + fill_v2_desc(ring, priv, + (k == frag_buf_num - 1) ? + sizeoflast : BD_MAX_SEND_SIZE, + dma + BD_MAX_SEND_SIZE * k, + frag_end && (k == frag_buf_num - 1) ? 1 : 0, + buf_num, + (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE, + mtu); +} + +int hns_nic_net_xmit_hw(struct net_device *ndev, + struct sk_buff *skb, + struct hns_nic_ring_data *ring_data) +{ + struct hns_nic_priv *priv = netdev_priv(ndev); + struct device *dev = priv->dev; + struct hnae_ring *ring = ring_data->ring; + struct netdev_queue *dev_queue; + struct skb_frag_struct *frag; + int buf_num; + int seg_num; + dma_addr_t dma; + int size, next_to_use; + int i; + + switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) { + case -EBUSY: ring->stats.tx_busy++; goto out_net_tx_busy; + case -ENOMEM: + ring->stats.sw_err_cnt++; + netdev_err(ndev, "no memory to xmit!\n"); + goto out_err_tx_ok; + default: + break; } + + /* no. of segments (plus a header) */ + seg_num = skb_shinfo(skb)->nr_frags + 1; next_to_use = ring->next_to_use; /* fill the first part */ @@ -151,11 +328,11 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_err_tx_ok; } - fill_desc(ring, skb, size, dma, buf_num == 1 ? 1 : 0, buf_num, - DESC_TYPE_SKB); + priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, + buf_num, DESC_TYPE_SKB, ndev->mtu); /* fill the fragments */ - for (i = 1; i < buf_num; i++) { + for (i = 1; i < seg_num; i++) { frag = &skb_shinfo(skb)->frags[i - 1]; size = skb_frag_size(frag); dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); @@ -164,8 +341,9 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_map_frag_fail; } - fill_desc(ring, skb_frag_page(frag), size, dma, - buf_num - 1 == i ? 1 : 0, buf_num, DESC_TYPE_PAGE); + priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, + seg_num - 1 == i ? 1 : 0, buf_num, + DESC_TYPE_PAGE, ndev->mtu); } /*complete translate all packets*/ @@ -182,19 +360,20 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, out_map_frag_fail: - for (j = i - 1; j > 0; j--) { + while (ring->next_to_use != next_to_use) { unfill_desc(ring); - next_to_use = ring->next_to_use; - dma_unmap_page(dev, ring->desc_cb[next_to_use].dma, - ring->desc_cb[next_to_use].length, - DMA_TO_DEVICE); + if (ring->next_to_use != next_to_use) + dma_unmap_page(dev, + ring->desc_cb[ring->next_to_use].dma, + ring->desc_cb[ring->next_to_use].length, + DMA_TO_DEVICE); + else + dma_unmap_single(dev, + ring->desc_cb[next_to_use].dma, + ring->desc_cb[next_to_use].length, + DMA_TO_DEVICE); } - unfill_desc(ring); - next_to_use = ring->next_to_use; - dma_unmap_single(dev, ring->desc_cb[next_to_use].dma, - ring->desc_cb[next_to_use].length, DMA_TO_DEVICE); - out_err_tx_ok: dev_kfree_skb_any(skb); @@ -313,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, return max_size; } -static void -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) +static void hns_nic_reuse_page(struct sk_buff *skb, int i, + struct hnae_ring *ring, int pull_len, + struct hnae_desc_cb *desc_cb) { + struct hnae_desc *desc; + int truesize, size; + int last_offset; + + desc = &ring->desc[ring->next_to_clean]; + size = le16_to_cpu(desc->rx.size); + +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + truesize = hnae_buf_size(ring); + } else { + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); + } + +#else + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); +#endif + + skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, + size - pull_len, truesize - pull_len); + /* avoid re-using remote pages,flag default unreuse */ if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) { +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + /* if we are only owner of page we can reuse it */ + if (likely(page_count(desc_cb->priv) == 1)) { + /* flip page offset to other buffer */ + desc_cb->page_offset ^= truesize; + + desc_cb->reuse_flag = 1; + /* bump ref count on page before it is given*/ + get_page(desc_cb->priv); + } + return; + } +#endif /* move offset up to the next cache line */ - desc_cb->page_offset += tsize; + desc_cb->page_offset += truesize; if (desc_cb->page_offset <= last_offset) { desc_cb->reuse_flag = 1; @@ -329,35 +546,59 @@ hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) } } +static void get_v2rx_desc_bnum(u32 bnum_flag, int *out_bnum) +{ + *out_bnum = hnae_get_field(bnum_flag, + HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S) + 1; +} + +static void get_rx_desc_bnum(u32 bnum_flag, int *out_bnum) +{ + *out_bnum = hnae_get_field(bnum_flag, + HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S); +} + static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, struct sk_buff **out_skb, int *out_bnum) { struct hnae_ring *ring = ring_data->ring; struct net_device *ndev = ring_data->napi.dev; + struct hns_nic_priv *priv = netdev_priv(ndev); struct sk_buff *skb; struct hnae_desc *desc; struct hnae_desc_cb *desc_cb; unsigned char *va; - int bnum, length, size, i, truesize, last_offset; + int bnum, length, i; int pull_len; u32 bnum_flag; - last_offset = hnae_page_size(ring) - hnae_buf_size(ring); desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; - length = le16_to_cpu(desc->rx.pkt_len); - bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); - bnum = hnae_get_field(bnum_flag, HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S); - *out_bnum = bnum; + + prefetch(desc); + va = (unsigned char *)desc_cb->buf + desc_cb->page_offset; - skb = *out_skb = napi_alloc_skb(&ring_data->napi, HNS_RX_HEAD_SIZE); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + skb = *out_skb = napi_alloc_skb(&ring_data->napi, + HNS_RX_HEAD_SIZE); if (unlikely(!skb)) { netdev_err(ndev, "alloc rx skb fail\n"); ring->stats.sw_err_cnt++; return -ENOMEM; } + prefetchw(skb->data); + length = le16_to_cpu(desc->rx.pkt_len); + bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); + priv->ops.get_rxd_bnum(bnum_flag, &bnum); + *out_bnum = bnum; + if (length <= HNS_RX_HEAD_SIZE) { memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); @@ -380,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, 0, desc_cb->priv, - desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); - - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb); ring_ptr_move_fw(ring, next_to_clean); if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/ @@ -396,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, for (i = 1; i < bnum; i++) { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, i, desc_cb->priv, - desc_cb->page_offset, - size, truesize); - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, i, ring, 0, desc_cb); ring_ptr_move_fw(ring, next_to_clean); } } @@ -540,20 +770,20 @@ recv: } /* make all data has been write before submit */ - if (clean_count > 0) { - hns_nic_alloc_rx_buffers(ring_data, clean_count); - clean_count = 0; - } - if (recv_pkts < budget) { ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM); - rmb(); /*complete read rx ring bd number*/ - if (ex_num > 0) { - num += ex_num; + + if (ex_num > clean_count) { + num += ex_num - clean_count; + rmb(); /*complete read rx ring bd number*/ goto recv; } } + /* make all data has been write before submit */ + if (clean_count > 0) + hns_nic_alloc_rx_buffers(ring_data, clean_count); + return recv_pkts; } @@ -642,14 +872,20 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, bytes = 0; pkts = 0; - while (head != ring->next_to_clean) + while (head != ring->next_to_clean) { hns_nic_reclaim_one_desc(ring, &bytes, &pkts); + /* issue prefetch for next Tx descriptor */ + prefetch(&ring->desc_cb[ring->next_to_clean]); + } NETIF_TX_UNLOCK(ndev); dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_completed_queue(dev_queue, pkts, bytes); + if (unlikely(priv->link && !netif_carrier_ok(ndev))) + netif_carrier_on(ndev); + if (unlikely(pkts && netif_carrier_ok(ndev) && (ring_space(ring) >= ring->max_desc_num_per_pkt * 2))) { /* Make sure that anybody stopping the queue after this @@ -716,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) ring_data->ring, 0); ring_data->fini_process(ring_data); + return 0; } return clean_complete; @@ -848,15 +1085,58 @@ static void hns_nic_ring_close(struct net_device *netdev, int idx) napi_disable(&priv->ring_data[idx].napi); } -static int hns_nic_init_irq(struct hns_nic_priv *priv) +static void hns_set_irq_affinity(struct hns_nic_priv *priv) { struct hnae_handle *h = priv->ae_handle; struct hns_nic_ring_data *rd; int i; - int ret; int cpu; cpumask_t mask; + /*diffrent irq banlance for 16core and 32core*/ + if (h->q_num == num_possible_cpus()) { + for (i = 0; i < h->q_num * 2; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index)) { + cpumask_clear(&mask); + cpu = rd->queue_index; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + } else { + for (i = 0; i < h->q_num; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index * 2)) { + cpumask_clear(&mask); + cpu = rd->queue_index * 2; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + + for (i = h->q_num; i < h->q_num * 2; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index * 2 + 1)) { + cpumask_clear(&mask); + cpu = rd->queue_index * 2 + 1; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + } +} + +static int hns_nic_init_irq(struct hns_nic_priv *priv) +{ + struct hnae_handle *h = priv->ae_handle; + struct hns_nic_ring_data *rd; + int i; + int ret; + for (i = 0; i < h->q_num * 2; i++) { rd = &priv->ring_data[i]; @@ -878,16 +1158,11 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) } disable_irq(rd->ring->irq); rd->ring->irq_init_flag = RCB_IRQ_INITED; - - /*set cpu affinity*/ - if (cpu_online(rd->queue_index)) { - cpumask_clear(&mask); - cpu = rd->queue_index; - cpumask_set_cpu(cpu, &mask); - irq_set_affinity_hint(rd->ring->irq, &mask); - } } + /*set cpu affinity*/ + hns_set_irq_affinity(priv); + return 0; } @@ -1136,6 +1411,51 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu) return ret; } +static int hns_nic_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + switch (priv->enet_ver) { + case AE_VERSION_1: + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + netdev_info(netdev, "enet v1 do not support tso!\n"); + break; + default: + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { + priv->ops.fill_desc = fill_tso_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; + /* The chip only support 7*4096 */ + netif_set_gso_max_size(netdev, 7 * 4096); + h->dev->ops->set_tso_stats(h, 1); + } else { + priv->ops.fill_desc = fill_v2_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + h->dev->ops->set_tso_stats(h, 0); + } + break; + } + netdev->features = features; + return 0; +} + +static netdev_features_t hns_nic_fix_features( + struct net_device *netdev, netdev_features_t features) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + + switch (priv->enet_ver) { + case AE_VERSION_1: + features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_HW_VLAN_CTAG_FILTER); + break; + default: + break; + } + return features; +} + /** * nic_set_multicast_list - set mutl mac address * @netdev: net device @@ -1231,6 +1551,8 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_set_mac_address = hns_nic_net_set_mac_address, .ndo_change_mtu = hns_nic_change_mtu, .ndo_do_ioctl = hns_nic_do_ioctl, + .ndo_set_features = hns_nic_set_features, + .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = hns_nic_poll_controller, @@ -1315,22 +1637,26 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv) return; hns_nic_dump(priv); - netdev_info(priv->netdev, "Reset %s port\n", - (type == HNAE_PORT_DEBUG ? "debug" : "business")); + netdev_info(priv->netdev, "try to reset %s port!\n", + (type == HNAE_PORT_DEBUG ? "debug" : "service")); rtnl_lock(); /* put off any impending NetWatchDogTimeout */ priv->netdev->trans_start = jiffies; - if (type == HNAE_PORT_DEBUG) + if (type == HNAE_PORT_DEBUG) { hns_nic_net_reinit(priv->netdev); + } else { + netif_carrier_off(priv->netdev); + netif_tx_disable(priv->netdev); + } rtnl_unlock(); } /* for doing service complete*/ static void hns_nic_service_event_complete(struct hns_nic_priv *priv) { - assert(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state)); + WARN_ON(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state)); smp_mb__before_atomic(); clear_bit(NIC_STATE_SERVICE_SCHED, &priv->state); @@ -1435,8 +1761,9 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv) for (i = 0; i < h->q_num * 2; i++) { netif_napi_del(&priv->ring_data[i].napi); if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) { - irq_set_affinity_hint(priv->ring_data[i].ring->irq, - NULL); + (void)irq_set_affinity_hint( + priv->ring_data[i].ring->irq, + NULL); free_irq(priv->ring_data[i].ring->irq, &priv->ring_data[i]); } @@ -1446,6 +1773,31 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv) kfree(priv->ring_data); } +static void hns_nic_set_priv_ops(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (AE_IS_VER1(priv->enet_ver)) { + priv->ops.fill_desc = fill_desc; + priv->ops.get_rxd_bnum = get_rx_desc_bnum; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + } else { + priv->ops.get_rxd_bnum = get_v2rx_desc_bnum; + if ((netdev->features & NETIF_F_TSO) || + (netdev->features & NETIF_F_TSO6)) { + priv->ops.fill_desc = fill_tso_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; + /* This chip only support 7*4096 */ + netif_set_gso_max_size(netdev, 7 * 4096); + h->dev->ops->set_tso_stats(h, 1); + } else { + priv->ops.fill_desc = fill_v2_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + } + } +} + static int hns_nic_try_get_ae(struct net_device *ndev) { struct hns_nic_priv *priv = netdev_priv(ndev); @@ -1473,6 +1825,8 @@ static int hns_nic_try_get_ae(struct net_device *ndev) goto out_init_ring_data; } + hns_nic_set_priv_ops(ndev); + ret = register_netdev(ndev); if (ret) { dev_err(priv->dev, "probe register netdev fail!\n"); @@ -1524,10 +1878,10 @@ static int hns_nic_dev_probe(struct platform_device *pdev) priv->dev = dev; priv->netdev = ndev; - if (of_device_is_compatible(node, "hisilicon,hns-nic-v2")) - priv->enet_ver = AE_VERSION_2; - else + if (of_device_is_compatible(node, "hisilicon,hns-nic-v1")) priv->enet_ver = AE_VERSION_1; + else + priv->enet_ver = AE_VERSION_2; ret = of_property_read_string(node, "ae-name", &priv->ae_name); if (ret) @@ -1543,6 +1897,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) ndev->priv_flags |= IFF_UNICAST_FLT; ndev->netdev_ops = &hns_nic_netdev_ops; hns_ethtool_set_ops(ndev); + ndev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; @@ -1550,6 +1905,17 @@ static int hns_nic_dev_probe(struct platform_device *pdev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ndev->vlan_features |= NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + switch (priv->enet_ver) { + case AE_VERSION_2: + ndev->features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; + break; + default: + break; + } + SET_NETDEV_DEV(ndev, dev); if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index dae0ed19ac6d..4b75270f014e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -40,6 +40,16 @@ struct hns_nic_ring_data { void (*fini_process)(struct hns_nic_ring_data *); }; +/* compatible the difference between two versions */ +struct hns_nic_ops { + void (*fill_desc)(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu); + int (*maybe_stop_tx)(struct sk_buff **out_skb, + int *bnum, struct hnae_ring *ring); + void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); +}; + struct hns_nic_priv { const char *ae_name; u32 enet_ver; @@ -51,6 +61,8 @@ struct hns_nic_priv { struct device *dev; struct hnae_handle *ae_handle; + struct hns_nic_ops ops; + /* the cb for nic to manage the ring buffer, the first half of the * array is for tx_ring and vice versa for the second half */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a0332129970b..3b234176dd36 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -11,7 +11,6 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/platform_device.h> - #include "hns_enet.h" #define HNS_PHY_PAGE_MDIX 0 @@ -667,6 +666,7 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev, drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0'; strncpy(drvinfo->fw_version, "N/A", ETHTOOL_FWVERS_LEN); + drvinfo->eedump_len = 0; } /** @@ -1187,6 +1187,95 @@ static int hns_nic_nway_reset(struct net_device *netdev) return ret; } +static u32 +hns_get_rss_key_size(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + u32 ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + ret = ops->get_rss_key_size(priv->ae_handle); + + return ret; +} + +static u32 +hns_get_rss_indir_size(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + u32 ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + ret = ops->get_rss_indir_size(priv->ae_handle); + + return ret; +} + +static int +hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + int ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + + if (!indir) + return 0; + + ret = ops->get_rss(priv->ae_handle, indir, key, hfunc); + + return 0; +} + +static int +hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + int ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + + /* currently hfunc can only be Toeplitz hash */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + + ret = ops->set_rss(priv->ae_handle, indir, key, hfunc); + + return 0; +} + static struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, @@ -1206,6 +1295,10 @@ static struct ethtool_ops hns_ethtool_ops = { .get_regs_len = hns_get_regs_len, .get_regs = hns_get_regs, .nway_reset = hns_nic_nway_reset, + .get_rxfh_key_size = hns_get_rss_key_size, + .get_rxfh_indir_size = hns_get_rss_indir_size, + .get_rxfh = hns_get_rss, + .set_rxfh = hns_set_rss, }; void hns_ethtool_set_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c index ae6e30d39f0f..1d5c3e16d8f4 100644 --- a/drivers/net/ethernet/hp/hp100.c +++ b/drivers/net/ethernet/hp/hp100.c @@ -2843,7 +2843,7 @@ static void cleanup_dev(struct net_device *d) } #ifdef CONFIG_EISA -static int __init hp100_eisa_probe (struct device *gendev) +static int hp100_eisa_probe(struct device *gendev) { struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private)); struct eisa_device *edev = to_eisa_device(gendev); diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 69707108d23c..98fe5a2cd6e3 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -213,8 +213,11 @@ struct e1000_rx_ring { }; #define E1000_DESC_UNUSED(R) \ - ((((R)->next_to_clean > (R)->next_to_use) \ - ? 0 : (R)->count) + (R)->next_to_clean - (R)->next_to_use - 1) +({ \ + unsigned int clean = smp_load_acquire(&(R)->next_to_clean); \ + unsigned int use = READ_ONCE((R)->next_to_use); \ + (clean > use ? 0 : (R)->count) + clean - use - 1; \ +}) #define E1000_RX_DESC_EXT(R, i) \ (&(((union e1000_rx_desc_extended *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index b1af0d613caa..8172cf08cc33 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -1,5 +1,5 @@ /******************************************************************************* - +* Intel PRO/1000 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -106,7 +106,7 @@ u16 e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = { 120, 120 }; -static DEFINE_SPINLOCK(e1000_eeprom_lock); +static DEFINE_MUTEX(e1000_eeprom_lock); static DEFINE_SPINLOCK(e1000_phy_lock); /** @@ -624,8 +624,8 @@ s32 e1000_init_hw(struct e1000_hw *hw) /* Workaround for PCI-X problem when BIOS sets MMRBC * incorrectly. */ - if (hw->bus_type == e1000_bus_type_pcix - && e1000_pcix_get_mmrbc(hw) > 2048) + if (hw->bus_type == e1000_bus_type_pcix && + e1000_pcix_get_mmrbc(hw) > 2048) e1000_pcix_set_mmrbc(hw, 2048); break; } @@ -683,10 +683,9 @@ static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw) } ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1, - &eeprom_data); - if (ret_val) { + &eeprom_data); + if (ret_val) return ret_val; - } if (eeprom_data != EEPROM_RESERVED_WORD) { /* Adjust SERDES output amplitude only. */ @@ -1074,8 +1073,8 @@ static s32 e1000_copper_link_preconfig(struct e1000_hw *hw) if (hw->mac_type <= e1000_82543 || hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 || - hw->mac_type == e1000_82541_rev_2 - || hw->mac_type == e1000_82547_rev_2) + hw->mac_type == e1000_82541_rev_2 || + hw->mac_type == e1000_82547_rev_2) hw->phy_reset_disable = false; return E1000_SUCCESS; @@ -1652,7 +1651,7 @@ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) mii_1000t_ctrl_reg = 0; } else { ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, - mii_1000t_ctrl_reg); + mii_1000t_ctrl_reg); if (ret_val) return ret_val; } @@ -1881,10 +1880,11 @@ static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw) if (ret_val) return ret_val; - if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) - && (!hw->autoneg) - && (hw->forced_speed_duplex == e1000_10_full - || hw->forced_speed_duplex == e1000_10_half)) { + if ((hw->mac_type == e1000_82544 || + hw->mac_type == e1000_82543) && + (!hw->autoneg) && + (hw->forced_speed_duplex == e1000_10_full || + hw->forced_speed_duplex == e1000_10_half)) { ret_val = e1000_polarity_reversal_workaround(hw); if (ret_val) return ret_val; @@ -2084,11 +2084,12 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw) * so we had to force link. In this case, we need to force the * configuration of the MAC to match the "fc" parameter. */ - if (((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) - || ((hw->media_type == e1000_media_type_internal_serdes) - && (hw->autoneg_failed)) - || ((hw->media_type == e1000_media_type_copper) - && (!hw->autoneg))) { + if (((hw->media_type == e1000_media_type_fiber) && + (hw->autoneg_failed)) || + ((hw->media_type == e1000_media_type_internal_serdes) && + (hw->autoneg_failed)) || + ((hw->media_type == e1000_media_type_copper) && + (!hw->autoneg))) { ret_val = e1000_force_mac_fc(hw); if (ret_val) { e_dbg("Error forcing flow control settings\n"); @@ -2193,8 +2194,7 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw) else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) - { + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc = E1000_FC_TX_PAUSE; e_dbg ("Flow Control = TX PAUSE frames only.\n"); @@ -2210,8 +2210,7 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw) else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && - (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) - { + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc = E1000_FC_RX_PAUSE; e_dbg ("Flow Control = RX PAUSE frames only.\n"); @@ -2460,10 +2459,11 @@ s32 e1000_check_for_link(struct e1000_hw *hw) * happen due to the execution of this workaround. */ - if ((hw->mac_type == e1000_82544 - || hw->mac_type == e1000_82543) && (!hw->autoneg) - && (hw->forced_speed_duplex == e1000_10_full - || hw->forced_speed_duplex == e1000_10_half)) { + if ((hw->mac_type == e1000_82544 || + hw->mac_type == e1000_82543) && + (!hw->autoneg) && + (hw->forced_speed_duplex == e1000_10_full || + hw->forced_speed_duplex == e1000_10_half)) { ew32(IMC, 0xffffffff); ret_val = e1000_polarity_reversal_workaround(hw); @@ -2528,8 +2528,10 @@ s32 e1000_check_for_link(struct e1000_hw *hw) */ if (hw->tbi_compatibility_en) { u16 speed, duplex; + ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex); + if (ret_val) { e_dbg ("Error getting link speed and duplex\n"); @@ -2628,10 +2630,10 @@ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data); if (ret_val) return ret_val; - if ((*speed == SPEED_100 - && !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) - || (*speed == SPEED_10 - && !(phy_data & NWAY_LPAR_10T_FD_CAPS))) + if ((*speed == SPEED_100 && + !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) || + (*speed == SPEED_10 && + !(phy_data & NWAY_LPAR_10T_FD_CAPS))) *duplex = HALF_DUPLEX; } } @@ -2664,9 +2666,9 @@ static s32 e1000_wait_autoneg(struct e1000_hw *hw) ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data); if (ret_val) return ret_val; - if (phy_data & MII_SR_AUTONEG_COMPLETE) { + if (phy_data & MII_SR_AUTONEG_COMPLETE) return E1000_SUCCESS; - } + msleep(100); } return E1000_SUCCESS; @@ -2803,11 +2805,11 @@ static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw) return data; } - /** * e1000_read_phy_reg - read a phy register * @hw: Struct containing variables accessed by shared code * @reg_addr: address of the PHY register to read + * @phy_data: pointer to the value on the PHY register * * Reads the value from a PHY register, if the value is on a specific non zero * page, sets the page first. @@ -2823,14 +2825,13 @@ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data) (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (u16) reg_addr); - if (ret_val) { - spin_unlock_irqrestore(&e1000_phy_lock, flags); - return ret_val; - } + if (ret_val) + goto out; } ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); +out: spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; @@ -2881,7 +2882,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, e_dbg("MDI Read Error\n"); return -E1000_ERR_PHY; } - *phy_data = (u16) mdic; + *phy_data = (u16)mdic; } else { mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | @@ -2906,7 +2907,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, e_dbg("MDI Error\n"); return -E1000_ERR_PHY; } - *phy_data = (u16) mdic; + *phy_data = (u16)mdic; } } else { /* We must first send a preamble through the MDIO pin to signal @@ -2960,7 +2961,7 @@ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 phy_data) if ((hw->phy_type == e1000_phy_igp) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, - (u16) reg_addr); + (u16)reg_addr); if (ret_val) { spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; @@ -2993,7 +2994,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, * the desired data. */ if (hw->mac_type == e1000_ce4100) { - mdic = (((u32) phy_data) | + mdic = (((u32)phy_data) | (reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (INTEL_CE_GBE_MDIC_OP_WRITE) | @@ -3015,7 +3016,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, return -E1000_ERR_PHY; } } else { - mdic = (((u32) phy_data) | + mdic = (((u32)phy_data) | (reg_addr << E1000_MDIC_REG_SHIFT) | (phy_addr << E1000_MDIC_PHY_SHIFT) | (E1000_MDIC_OP_WRITE)); @@ -3053,7 +3054,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) | (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); mdic <<= 16; - mdic |= (u32) phy_data; + mdic |= (u32)phy_data; e1000_shift_out_mdi_bits(hw, mdic, 32); } @@ -3176,14 +3177,14 @@ static s32 e1000_detect_gig_phy(struct e1000_hw *hw) if (ret_val) return ret_val; - hw->phy_id = (u32) (phy_id_high << 16); + hw->phy_id = (u32)(phy_id_high << 16); udelay(20); ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low); if (ret_val) return ret_val; - hw->phy_id |= (u32) (phy_id_low & PHY_REVISION_MASK); - hw->phy_revision = (u32) phy_id_low & ~PHY_REVISION_MASK; + hw->phy_id |= (u32)(phy_id_low & PHY_REVISION_MASK); + hw->phy_revision = (u32)phy_id_low & ~PHY_REVISION_MASK; switch (hw->mac_type) { case e1000_82543: @@ -3401,7 +3402,6 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >> SR_1000T_REMOTE_RX_STATUS_SHIFT) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; - } return E1000_SUCCESS; @@ -3449,7 +3449,7 @@ s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info) if (hw->phy_type == e1000_phy_igp) return e1000_phy_igp_get_info(hw, phy_info); else if ((hw->phy_type == e1000_phy_8211) || - (hw->phy_type == e1000_phy_8201)) + (hw->phy_type == e1000_phy_8201)) return E1000_SUCCESS; else return e1000_phy_m88_get_info(hw, phy_info); @@ -3611,11 +3611,11 @@ static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count) */ mask = 0x01 << (count - 1); eecd = er32(EECD); - if (eeprom->type == e1000_eeprom_microwire) { + if (eeprom->type == e1000_eeprom_microwire) eecd &= ~E1000_EECD_DO; - } else if (eeprom->type == e1000_eeprom_spi) { + else if (eeprom->type == e1000_eeprom_spi) eecd |= E1000_EECD_DO; - } + do { /* A "1" is shifted out to the EEPROM by setting bit "DI" to a * "1", and then raising and then lowering the clock (the SK bit @@ -3851,7 +3851,7 @@ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw) do { e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI, hw->eeprom.opcode_bits); - spi_stat_reg = (u8) e1000_shift_in_ee_bits(hw, 8); + spi_stat_reg = (u8)e1000_shift_in_ee_bits(hw, 8); if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI)) break; @@ -3882,9 +3882,10 @@ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw) s32 e1000_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 ret; - spin_lock(&e1000_eeprom_lock); + + mutex_lock(&e1000_eeprom_lock); ret = e1000_do_read_eeprom(hw, offset, words, data); - spin_unlock(&e1000_eeprom_lock); + mutex_unlock(&e1000_eeprom_lock); return ret; } @@ -3896,15 +3897,16 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, if (hw->mac_type == e1000_ce4100) { GBE_CONFIG_FLASH_READ(GBE_CONFIG_BASE_VIRT, offset, words, - data); + data); return E1000_SUCCESS; } /* A check for invalid values: offset too large, too many words, and * not enough words. */ - if ((offset >= eeprom->word_size) - || (words > eeprom->word_size - offset) || (words == 0)) { + if ((offset >= eeprom->word_size) || + (words > eeprom->word_size - offset) || + (words == 0)) { e_dbg("\"words\" parameter out of bounds. Words = %d," "size = %d\n", offset, eeprom->word_size); return -E1000_ERR_EEPROM; @@ -3940,7 +3942,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, /* Send the READ command (opcode + addr) */ e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits); - e1000_shift_out_ee_bits(hw, (u16) (offset * 2), + e1000_shift_out_ee_bits(hw, (u16)(offset * 2), eeprom->address_bits); /* Read the data. The address of the eeprom internally @@ -3960,7 +3962,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE_MICROWIRE, eeprom->opcode_bits); - e1000_shift_out_ee_bits(hw, (u16) (offset + i), + e1000_shift_out_ee_bits(hw, (u16)(offset + i), eeprom->address_bits); /* Read the data. For microwire, each word requires the @@ -3968,6 +3970,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, */ data[i] = e1000_shift_in_ee_bits(hw, 16); e1000_standby_eeprom(hw); + cond_resched(); } } @@ -4004,7 +4007,7 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) return E1000_SUCCESS; #endif - if (checksum == (u16) EEPROM_SUM) + if (checksum == (u16)EEPROM_SUM) return E1000_SUCCESS; else { e_dbg("EEPROM Checksum Invalid\n"); @@ -4031,7 +4034,7 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw) } checksum += eeprom_data; } - checksum = (u16) EEPROM_SUM - checksum; + checksum = (u16)EEPROM_SUM - checksum; if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) { e_dbg("EEPROM Write Error\n"); return -E1000_ERR_EEPROM; @@ -4052,9 +4055,10 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw) s32 e1000_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 ret; - spin_lock(&e1000_eeprom_lock); + + mutex_lock(&e1000_eeprom_lock); ret = e1000_do_write_eeprom(hw, offset, words, data); - spin_unlock(&e1000_eeprom_lock); + mutex_unlock(&e1000_eeprom_lock); return ret; } @@ -4066,15 +4070,16 @@ static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, if (hw->mac_type == e1000_ce4100) { GBE_CONFIG_FLASH_WRITE(GBE_CONFIG_BASE_VIRT, offset, words, - data); + data); return E1000_SUCCESS; } /* A check for invalid values: offset too large, too many words, and * not enough words. */ - if ((offset >= eeprom->word_size) - || (words > eeprom->word_size - offset) || (words == 0)) { + if ((offset >= eeprom->word_size) || + (words > eeprom->word_size - offset) || + (words == 0)) { e_dbg("\"words\" parameter out of bounds\n"); return -E1000_ERR_EEPROM; } @@ -4116,6 +4121,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words, return -E1000_ERR_EEPROM; e1000_standby_eeprom(hw); + cond_resched(); /* Send the WRITE ENABLE command (8 bit opcode ) */ e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI, @@ -4132,7 +4138,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words, /* Send the Write command (8-bit opcode + addr) */ e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits); - e1000_shift_out_ee_bits(hw, (u16) ((offset + widx) * 2), + e1000_shift_out_ee_bits(hw, (u16)((offset + widx) * 2), eeprom->address_bits); /* Send the data */ @@ -4142,6 +4148,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words, */ while (widx < words) { u16 word_out = data[widx]; + word_out = (word_out >> 8) | (word_out << 8); e1000_shift_out_ee_bits(hw, word_out, 16); widx++; @@ -4183,9 +4190,9 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, * EEPROM into write/erase mode. */ e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE, - (u16) (eeprom->opcode_bits + 2)); + (u16)(eeprom->opcode_bits + 2)); - e1000_shift_out_ee_bits(hw, 0, (u16) (eeprom->address_bits - 2)); + e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2)); /* Prepare the EEPROM */ e1000_standby_eeprom(hw); @@ -4195,7 +4202,7 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE, eeprom->opcode_bits); - e1000_shift_out_ee_bits(hw, (u16) (offset + words_written), + e1000_shift_out_ee_bits(hw, (u16)(offset + words_written), eeprom->address_bits); /* Send the data */ @@ -4224,6 +4231,7 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, /* Recover from write */ e1000_standby_eeprom(hw); + cond_resched(); words_written++; } @@ -4235,9 +4243,9 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset, * EEPROM out of write/erase mode. */ e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE, - (u16) (eeprom->opcode_bits + 2)); + (u16)(eeprom->opcode_bits + 2)); - e1000_shift_out_ee_bits(hw, 0, (u16) (eeprom->address_bits - 2)); + e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2)); return E1000_SUCCESS; } @@ -4260,8 +4268,8 @@ s32 e1000_read_mac_addr(struct e1000_hw *hw) e_dbg("EEPROM Read Error\n"); return -E1000_ERR_EEPROM; } - hw->perm_mac_addr[i] = (u8) (eeprom_data & 0x00FF); - hw->perm_mac_addr[i + 1] = (u8) (eeprom_data >> 8); + hw->perm_mac_addr[i] = (u8)(eeprom_data & 0x00FF); + hw->perm_mac_addr[i + 1] = (u8)(eeprom_data >> 8); } switch (hw->mac_type) { @@ -4328,19 +4336,19 @@ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) */ case 0: /* [47:36] i.e. 0x563 for above example address */ - hash_value = ((mc_addr[4] >> 4) | (((u16) mc_addr[5]) << 4)); + hash_value = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); break; case 1: /* [46:35] i.e. 0xAC6 for above example address */ - hash_value = ((mc_addr[4] >> 3) | (((u16) mc_addr[5]) << 5)); + hash_value = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5)); break; case 2: /* [45:34] i.e. 0x5D8 for above example address */ - hash_value = ((mc_addr[4] >> 2) | (((u16) mc_addr[5]) << 6)); + hash_value = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6)); break; case 3: /* [43:32] i.e. 0x634 for above example address */ - hash_value = ((mc_addr[4]) | (((u16) mc_addr[5]) << 8)); + hash_value = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); break; } @@ -4361,9 +4369,9 @@ void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) | + ((u32)addr[2] << 16) | ((u32)addr[3] << 24)); + rar_high = ((u32)addr[4] | ((u32)addr[5] << 8)); /* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx * unit hang. @@ -4537,7 +4545,7 @@ s32 e1000_setup_led(struct e1000_hw *hw) if (ret_val) return ret_val; ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, - (u16) (hw->phy_spd_default & + (u16)(hw->phy_spd_default & ~IGP01E1000_GMII_SPD)); if (ret_val) return ret_val; @@ -4802,7 +4810,7 @@ void e1000_reset_adaptive(struct e1000_hw *hw) void e1000_update_adaptive(struct e1000_hw *hw) { if (hw->adaptive_ifs) { - if ((hw->collision_delta *hw->ifs_ratio) > hw->tx_packet_delta) { + if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) { if (hw->tx_packet_delta > MIN_NUM_XMITS) { hw->in_ifs_mode = true; if (hw->current_ifs_val < hw->ifs_max_val) { @@ -4816,8 +4824,8 @@ void e1000_update_adaptive(struct e1000_hw *hw) } } } else { - if (hw->in_ifs_mode - && (hw->tx_packet_delta <= MIN_NUM_XMITS)) { + if (hw->in_ifs_mode && + (hw->tx_packet_delta <= MIN_NUM_XMITS)) { hw->current_ifs_val = 0; hw->in_ifs_mode = false; ew32(AIT, 0); @@ -4922,7 +4930,6 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, /* Use old method for Phy older than IGP */ if (hw->phy_type == e1000_phy_m88) { - ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) @@ -4966,7 +4973,6 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, }; /* Read the AGC registers for all channels */ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { - ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data); if (ret_val) @@ -4976,8 +4982,8 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, /* Value bound check. */ if ((cur_agc_value >= - IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) - || (cur_agc_value == 0)) + IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) || + (cur_agc_value == 0)) return -E1000_ERR_PHY; agc_value += cur_agc_value; @@ -5054,7 +5060,6 @@ static s32 e1000_check_polarity(struct e1000_hw *hw, */ if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { - /* Read the GIG initialization PCS register (0x00B4) */ ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG, @@ -5175,8 +5180,8 @@ static s32 e1000_1000Mb_check_cable_length(struct e1000_hw *hw) hw->ffe_config_state = e1000_ffe_config_active; ret_val = e1000_write_phy_reg(hw, - IGP01E1000_PHY_DSP_FFE, - IGP01E1000_PHY_DSP_FFE_CM_CP); + IGP01E1000_PHY_DSP_FFE, + IGP01E1000_PHY_DSP_FFE_CM_CP); if (ret_val) return ret_val; break; @@ -5243,7 +5248,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, - IGP01E1000_IEEE_FORCE_GIGA); + IGP01E1000_IEEE_FORCE_GIGA); if (ret_val) return ret_val; for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { @@ -5264,7 +5269,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) } ret_val = e1000_write_phy_reg(hw, 0x0000, - IGP01E1000_IEEE_RESTART_AUTONEG); + IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) return ret_val; @@ -5299,7 +5304,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, - IGP01E1000_IEEE_FORCE_GIGA); + IGP01E1000_IEEE_FORCE_GIGA); if (ret_val) return ret_val; ret_val = @@ -5309,7 +5314,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) return ret_val; ret_val = e1000_write_phy_reg(hw, 0x0000, - IGP01E1000_IEEE_RESTART_AUTONEG); + IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) return ret_val; @@ -5346,9 +5351,8 @@ static s32 e1000_set_phy_mode(struct e1000_hw *hw) ret_val = e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1, &eeprom_data); - if (ret_val) { + if (ret_val) return ret_val; - } if ((eeprom_data != EEPROM_RESERVED_WORD) && (eeprom_data & EEPROM_PHY_CLASS_A)) { @@ -5395,8 +5399,8 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) * from the lowest speeds starting from 10Mbps. The capability is used * for Dx transitions and states */ - if (hw->mac_type == e1000_82541_rev_2 - || hw->mac_type == e1000_82547_rev_2) { + if (hw->mac_type == e1000_82541_rev_2 || + hw->mac_type == e1000_82547_rev_2) { ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data); if (ret_val) @@ -5446,11 +5450,9 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) if (ret_val) return ret_val; } - } else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) - || (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL) - || (hw->autoneg_advertised == - AUTONEG_ADVERTISE_10_100_ALL)) { - + } else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) || + (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL) || + (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) { if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) { phy_data |= IGP01E1000_GMII_FLEX_SPD; @@ -5474,7 +5476,6 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) phy_data); if (ret_val) return ret_val; - } return E1000_SUCCESS; } @@ -5542,7 +5543,6 @@ static s32 e1000_set_vco_speed(struct e1000_hw *hw) return E1000_SUCCESS; } - /** * e1000_enable_mng_pass_thru - check for bmc pass through * @hw: Struct containing variables accessed by shared code diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index fd7be860c201..3fc7bde699ba 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -99,13 +99,13 @@ int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); void e1000_free_all_tx_resources(struct e1000_adapter *adapter); void e1000_free_all_rx_resources(struct e1000_adapter *adapter); static int e1000_setup_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *txdr); + struct e1000_tx_ring *txdr); static int e1000_setup_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rxdr); + struct e1000_rx_ring *rxdr); static void e1000_free_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *tx_ring); + struct e1000_tx_ring *tx_ring); static void e1000_free_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring); void e1000_update_stats(struct e1000_adapter *adapter); static int e1000_init_module(void); @@ -122,16 +122,16 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter); static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter); static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter); static void e1000_clean_tx_ring(struct e1000_adapter *adapter, - struct e1000_tx_ring *tx_ring); + struct e1000_tx_ring *tx_ring); static void e1000_clean_rx_ring(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring); static void e1000_set_rx_mode(struct net_device *netdev); static void e1000_update_phy_info_task(struct work_struct *work); static void e1000_watchdog(struct work_struct *work); static void e1000_82547_tx_fifo_stall_task(struct work_struct *work); static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -static struct net_device_stats * e1000_get_stats(struct net_device *netdev); +static struct net_device_stats *e1000_get_stats(struct net_device *netdev); static int e1000_change_mtu(struct net_device *netdev, int new_mtu); static int e1000_set_mac(struct net_device *netdev, void *p); static irqreturn_t e1000_intr(int irq, void *data); @@ -164,7 +164,7 @@ static void e1000_tx_timeout(struct net_device *dev); static void e1000_reset_task(struct work_struct *work); static void e1000_smartspeed(struct e1000_adapter *adapter); static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, - struct sk_buff *skb); + struct sk_buff *skb); static bool e1000_vlan_used(struct e1000_adapter *adapter); static void e1000_vlan_mode(struct net_device *netdev, @@ -195,7 +195,7 @@ MODULE_PARM_DESC(copybreak, "Maximum size of packet that is copied to a new buffer on receive"); static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, - pci_channel_state_t state); + pci_channel_state_t state); static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev); static void e1000_io_resume(struct pci_dev *pdev); @@ -287,7 +287,7 @@ static int e1000_request_irq(struct e1000_adapter *adapter) int err; err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name, - netdev); + netdev); if (err) { e_err(probe, "Unable to allocate interrupt Error: %d\n", err); } @@ -636,8 +636,8 @@ void e1000_reset(struct e1000_adapter *adapter) * but don't include ethernet FCS because hardware appends it */ min_tx_space = (hw->max_frame_size + - sizeof(struct e1000_tx_desc) - - ETH_FCS_LEN) * 2; + sizeof(struct e1000_tx_desc) - + ETH_FCS_LEN) * 2; min_tx_space = ALIGN(min_tx_space, 1024); min_tx_space >>= 10; /* software strips receive CRC, so leave room for it */ @@ -943,8 +943,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct e1000_adapter *adapter; struct e1000_hw *hw; - static int cards_found = 0; - static int global_quad_port_a = 0; /* global ksp3 port a indication */ + static int cards_found; + static int global_quad_port_a; /* global ksp3 port a indication */ int i, err, pci_using_dac; u16 eeprom_data = 0; u16 tmp = 0; @@ -1046,7 +1046,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac_type == e1000_ce4100) { hw->ce4100_gbe_mdio_base_virt = ioremap(pci_resource_start(pdev, BAR_1), - pci_resource_len(pdev, BAR_1)); + pci_resource_len(pdev, BAR_1)); if (!hw->ce4100_gbe_mdio_base_virt) goto err_mdio_ioremap; @@ -1148,7 +1148,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) break; case e1000_82546: case e1000_82546_rev_3: - if (er32(STATUS) & E1000_STATUS_FUNC_1){ + if (er32(STATUS) & E1000_STATUS_FUNC_1) { e1000_read_eeprom(hw, EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; @@ -1199,13 +1199,13 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < 32; i++) { hw->phy_addr = i; e1000_read_phy_reg(hw, PHY_ID2, &tmp); - if (tmp == 0 || tmp == 0xFF) { - if (i == 31) - goto err_eeprom; - continue; - } else + + if (tmp != 0 && tmp != 0xFF) break; } + + if (i >= 32) + goto err_eeprom; } /* reset the hardware with the new settings */ @@ -1263,7 +1263,7 @@ err_pci_reg: * @pdev: PCI device information struct * * e1000_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a + * that it should release a PCI device. That could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ @@ -1334,12 +1334,12 @@ static int e1000_sw_init(struct e1000_adapter *adapter) static int e1000_alloc_queues(struct e1000_adapter *adapter) { adapter->tx_ring = kcalloc(adapter->num_tx_queues, - sizeof(struct e1000_tx_ring), GFP_KERNEL); + sizeof(struct e1000_tx_ring), GFP_KERNEL); if (!adapter->tx_ring) return -ENOMEM; adapter->rx_ring = kcalloc(adapter->num_rx_queues, - sizeof(struct e1000_rx_ring), GFP_KERNEL); + sizeof(struct e1000_rx_ring), GFP_KERNEL); if (!adapter->rx_ring) { kfree(adapter->tx_ring); return -ENOMEM; @@ -1811,20 +1811,20 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; switch (adapter->rx_buffer_len) { - case E1000_RXBUFFER_2048: - default: - rctl |= E1000_RCTL_SZ_2048; - rctl &= ~E1000_RCTL_BSEX; - break; - case E1000_RXBUFFER_4096: - rctl |= E1000_RCTL_SZ_4096; - break; - case E1000_RXBUFFER_8192: - rctl |= E1000_RCTL_SZ_8192; - break; - case E1000_RXBUFFER_16384: - rctl |= E1000_RCTL_SZ_16384; - break; + case E1000_RXBUFFER_2048: + default: + rctl |= E1000_RCTL_SZ_2048; + rctl &= ~E1000_RCTL_BSEX; + break; + case E1000_RXBUFFER_4096: + rctl |= E1000_RCTL_SZ_4096; + break; + case E1000_RXBUFFER_8192: + rctl |= E1000_RCTL_SZ_8192; + break; + case E1000_RXBUFFER_16384: + rctl |= E1000_RCTL_SZ_16384; + break; } /* This is useful for sniffing bad packets. */ @@ -1861,12 +1861,12 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) if (adapter->netdev->mtu > ETH_DATA_LEN) { rdlen = adapter->rx_ring[0].count * - sizeof(struct e1000_rx_desc); + sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_jumbo_rx_irq; adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers; } else { rdlen = adapter->rx_ring[0].count * - sizeof(struct e1000_rx_desc); + sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_rx_irq; adapter->alloc_rx_buf = e1000_alloc_rx_buffers; } @@ -2761,7 +2761,9 @@ static int e1000_tso(struct e1000_adapter *adapter, buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - if (++i == tx_ring->count) i = 0; + if (++i == tx_ring->count) + i = 0; + tx_ring->next_to_use = i; return true; @@ -2816,7 +2818,9 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter, buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; - if (unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) + i = 0; + tx_ring->next_to_use = i; return true; @@ -2865,8 +2869,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter, * packet is smaller than 2048 - 16 - 16 (or 2016) bytes */ if (unlikely((hw->bus_type == e1000_bus_type_pcix) && - (size > 2015) && count == 0)) - size = 2015; + (size > 2015) && count == 0)) + size = 2015; /* Workaround for potential 82544 hang in PCI-X. Avoid * terminating buffers within evenly-aligned dwords. @@ -2963,7 +2967,7 @@ dma_error: count--; while (count--) { - if (i==0) + if (i == 0) i += tx_ring->count; i--; buffer_info = &tx_ring->buffer_info[i]; @@ -3013,7 +3017,8 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length); tx_desc->upper.data = cpu_to_le32(txd_upper); - if (unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) + i = 0; } tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); @@ -3101,7 +3106,7 @@ static int e1000_maybe_stop_tx(struct net_device *netdev, return __e1000_maybe_stop_tx(netdev, size); } -#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) +#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1) static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { @@ -3841,7 +3846,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_buffer *buffer_info; unsigned int i, eop; unsigned int count = 0; - unsigned int total_tx_bytes=0, total_tx_packets=0; + unsigned int total_tx_bytes = 0, total_tx_packets = 0; unsigned int bytes_compl = 0, pkts_compl = 0; i = tx_ring->next_to_clean; @@ -3869,14 +3874,18 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, e1000_unmap_and_free_tx_resource(adapter, buffer_info); tx_desc->upper.data = 0; - if (unlikely(++i == tx_ring->count)) i = 0; + if (unlikely(++i == tx_ring->count)) + i = 0; } eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); } - tx_ring->next_to_clean = i; + /* Synchronize with E1000_DESC_UNUSED called from e1000_xmit_frame, + * which will reuse the cleaned buffers. + */ + smp_store_release(&tx_ring->next_to_clean, i); netdev_completed_queue(netdev, pkts_compl, bytes_compl); @@ -3954,9 +3963,11 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, skb_checksum_none_assert(skb); /* 82543 or newer only */ - if (unlikely(hw->mac_type < e1000_82543)) return; + if (unlikely(hw->mac_type < e1000_82543)) + return; /* Ignore Checksum bit is set */ - if (unlikely(status & E1000_RXD_STAT_IXSM)) return; + if (unlikely(status & E1000_RXD_STAT_IXSM)) + return; /* TCP/UDP checksum error bit is set */ if (unlikely(errors & E1000_RXD_ERR_TCPE)) { /* let the stack verify checksum errors */ @@ -4136,7 +4147,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, unsigned int i; int cleaned_count = 0; bool cleaned = false; - unsigned int total_rx_bytes=0, total_rx_packets=0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); @@ -4153,7 +4164,9 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter, status = rx_desc->status; - if (++i == rx_ring->count) i = 0; + if (++i == rx_ring->count) + i = 0; + next_rxd = E1000_RX_DESC(*rx_ring, i); prefetch(next_rxd); @@ -4356,7 +4369,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, unsigned int i; int cleaned_count = 0; bool cleaned = false; - unsigned int total_rx_bytes=0, total_rx_packets=0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); @@ -4395,7 +4408,9 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, buffer_info->rxbuf.data = NULL; } - if (++i == rx_ring->count) i = 0; + if (++i == rx_ring->count) + i = 0; + next_rxd = E1000_RX_DESC(*rx_ring, i); prefetch(next_rxd); @@ -4683,9 +4698,11 @@ static void e1000_smartspeed(struct e1000_adapter *adapter) * we assume back-to-back */ e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status); - if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; + if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) + return; e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status); - if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return; + if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) + return; e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_ctrl); if (phy_ctrl & CR_1000T_MS_ENABLE) { phy_ctrl &= ~CR_1000T_MS_ENABLE; diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index c9da4654e9ca..b3949d5bef5c 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -91,6 +91,7 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* SPT PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* SPT-H PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* SPT-H PCH */ +#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LBG PCH */ #define E1000_REVISION_4 4 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 91a5a0ae9cd7..a049e30639a1 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1984,7 +1984,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) int i = 0; while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && - (i++ < 10)) + (i++ < 30)) usleep_range(10000, 20000); return blocked ? E1000_BLK_PHY_RESET : 0; } @@ -3093,24 +3093,45 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) struct e1000_nvm_info *nvm = &hw->nvm; u32 bank1_offset = nvm->flash_bank_size * sizeof(u16); u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1; + u32 nvm_dword = 0; u8 sig_byte = 0; s32 ret_val; switch (hw->mac.type) { - /* In SPT, read from the CTRL_EXT reg instead of - * accessing the sector valid bits from the nvm - */ case e1000_pch_spt: - *bank = er32(CTRL_EXT) - & E1000_CTRL_EXT_NVMVS; - if ((*bank == 0) || (*bank == 1)) { - e_dbg("ERROR: No valid NVM bank present\n"); - return -E1000_ERR_NVM; - } else { - *bank = *bank - 2; + bank1_offset = nvm->flash_bank_size; + act_offset = E1000_ICH_NVM_SIG_WORD; + + /* set bank to 0 in case flash read fails */ + *bank = 0; + + /* Check bank 0 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 0; return 0; } - break; + + /* Check bank 1 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset + + bank1_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 1; + return 0; + } + + e_dbg("ERROR: No valid NVM bank present\n"); + return -E1000_ERR_NVM; case e1000_ich8lan: case e1000_ich9lan: eecd = er32(EECD); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 0a854a47d31a..775e38910681 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1959,8 +1959,10 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data) * previous interrupt. */ if (rx_ring->set_itr) { - writel(1000000000 / (rx_ring->itr_val * 256), - rx_ring->itr_register); + u32 itr = rx_ring->itr_val ? + 1000000000 / (rx_ring->itr_val * 256) : 0; + + writel(itr, rx_ring->itr_register); rx_ring->set_itr = 0; } @@ -7465,6 +7467,7 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM2), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V2), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LBG_I219_LM3), board_pch_spt }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; @@ -7504,14 +7507,11 @@ static struct pci_driver e1000_driver = { **/ static int __init e1000_init_module(void) { - int ret; - pr_info("Intel(R) PRO/1000 Network Driver - %s\n", e1000e_driver_version); pr_info("Copyright(c) 1999 - 2015 Intel Corporation.\n"); - ret = pci_register_driver(&e1000_driver); - return ret; + return pci_register_driver(&e1000_driver); } module_init(e1000_init_module); diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile index 08859dd220a8..b006ff66d028 100644 --- a/drivers/net/ethernet/intel/fm10k/Makefile +++ b/drivers/net/ethernet/intel/fm10k/Makefile @@ -1,7 +1,7 @@ ################################################################################ # # Intel Ethernet Switch Host Interface Driver -# Copyright(c) 2013 - 2014 Intel Corporation. +# Copyright(c) 2013 - 2015 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, @@ -27,7 +27,17 @@ obj-$(CONFIG_FM10K) += fm10k.o -fm10k-objs := fm10k_main.o fm10k_common.o fm10k_pci.o \ - fm10k_netdev.o fm10k_ethtool.o fm10k_pf.o fm10k_vf.o \ - fm10k_mbx.o fm10k_iov.o fm10k_tlv.o \ - fm10k_debugfs.o fm10k_ptp.o fm10k_dcbnl.o +fm10k-y := fm10k_main.o \ + fm10k_common.o \ + fm10k_pci.o \ + fm10k_ptp.o \ + fm10k_netdev.o \ + fm10k_ethtool.o \ + fm10k_pf.o \ + fm10k_vf.o \ + fm10k_mbx.o \ + fm10k_iov.o \ + fm10k_tlv.o + +fm10k-$(CONFIG_DEBUG_FS) += fm10k_debugfs.o +fm10k-$(CONFIG_DCB) += fm10k_dcbnl.o diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 14440200499b..b34bb008b104 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/etherdevice.h> +#include <linux/cpumask.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> #include <linux/pci.h> @@ -33,7 +34,7 @@ #include "fm10k_pf.h" #include "fm10k_vf.h" -#define FM10K_MAX_JUMBO_FRAME_SIZE 15358 /* Maximum supported size 15K */ +#define FM10K_MAX_JUMBO_FRAME_SIZE 15342 /* Maximum supported size 15K */ #define MAX_QUEUES FM10K_MAX_QUEUES_PF @@ -66,6 +67,7 @@ struct fm10k_l2_accel { enum fm10k_ring_state_t { __FM10K_TX_DETECT_HANG, __FM10K_HANG_CHECK_ARMED, + __FM10K_TX_XPS_INIT_DONE, }; #define check_for_tx_hang(ring) \ @@ -138,7 +140,7 @@ struct fm10k_ring { * different for DCB and RSS modes */ u8 qos_pc; /* priority class of queue */ - u16 vid; /* default vlan ID of queue */ + u16 vid; /* default VLAN ID of queue */ u16 count; /* amount of descriptors */ u16 next_to_alloc; @@ -164,14 +166,20 @@ struct fm10k_ring_container { unsigned int total_packets; /* total packets processed this int */ u16 work_limit; /* total work allowed per interrupt */ u16 itr; /* interrupt throttle rate value */ + u8 itr_scale; /* ITR adjustment based on PCI speed */ u8 count; /* total number of rings in vector */ }; #define FM10K_ITR_MAX 0x0FFF /* maximum value for ITR */ #define FM10K_ITR_10K 100 /* 100us */ #define FM10K_ITR_20K 50 /* 50us */ +#define FM10K_ITR_40K 25 /* 25us */ #define FM10K_ITR_ADAPTIVE 0x8000 /* adaptive interrupt moderation flag */ +#define ITR_IS_ADAPTIVE(itr) (!!(itr & FM10K_ITR_ADAPTIVE)) + +#define FM10K_TX_ITR_DEFAULT FM10K_ITR_40K +#define FM10K_RX_ITR_DEFAULT FM10K_ITR_20K #define FM10K_ITR_ENABLE (FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR) static inline struct netdev_queue *txring_txq(const struct fm10k_ring *ring) @@ -203,6 +211,7 @@ struct fm10k_q_vector { struct fm10k_ring_container rx, tx; struct napi_struct napi; + cpumask_t affinity_mask; char name[IFNAMSIZ + 9]; #ifdef CONFIG_DEBUG_FS @@ -413,7 +422,7 @@ static inline u16 fm10k_desc_unused(struct fm10k_ring *ring) (&(((union fm10k_rx_desc *)((R)->desc))[i])) #define FM10K_MAX_TXD_PWR 14 -#define FM10K_MAX_DATA_PER_TXD (1 << FM10K_MAX_TXD_PWR) +#define FM10K_MAX_DATA_PER_TXD BIT(FM10K_MAX_TXD_PWR) /* Tx Descriptors needed, worst case */ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD) @@ -434,7 +443,7 @@ union fm10k_ftag_info { struct { /* dglort and sglort combined into a single 32bit desc read */ __le32 glort; - /* upper 16 bits of vlan are reserved 0 for swpri_type_user */ + /* upper 16 bits of VLAN are reserved 0 for swpri_type_user */ __le32 vlan; } d; struct { @@ -484,7 +493,7 @@ void fm10k_netpoll(struct net_device *netdev); #endif /* Netdev */ -struct net_device *fm10k_alloc_netdev(void); +struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info); int fm10k_setup_rx_resources(struct fm10k_ring *); int fm10k_setup_tx_resources(struct fm10k_ring *); void fm10k_free_rx_resources(struct fm10k_ring *); @@ -551,5 +560,9 @@ int fm10k_get_ts_config(struct net_device *netdev, struct ifreq *ifr); int fm10k_set_ts_config(struct net_device *netdev, struct ifreq *ifr); /* DCB */ +#ifdef CONFIG_DCB void fm10k_dcbnl_set_ops(struct net_device *dev); +#else +static inline void fm10k_dcbnl_set_ops(struct net_device *dev) {} +#endif #endif /* _FM10K_H_ */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c index 5c7a4d7662d8..2be4361839db 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c @@ -20,7 +20,6 @@ #include "fm10k.h" -#ifdef CONFIG_DCB /** * fm10k_dcbnl_ieee_getets - get the ETS configuration for the device * @dev: netdev interface for the device @@ -155,7 +154,6 @@ static const struct dcbnl_rtnl_ops fm10k_dcbnl_ops = { .setdcbx = fm10k_dcbnl_setdcbx, }; -#endif /* CONFIG_DCB */ /** * fm10k_dcbnl_set_ops - Configures dcbnl ops pointer for netdev * @dev: netdev interface for the device @@ -164,11 +162,9 @@ static const struct dcbnl_rtnl_ops fm10k_dcbnl_ops = { **/ void fm10k_dcbnl_set_ops(struct net_device *dev) { -#ifdef CONFIG_DCB struct fm10k_intfc *interface = netdev_priv(dev); struct fm10k_hw *hw = &interface->hw; if (hw->mac.type == fm10k_mac_pf) dev->dcbnl_ops = &fm10k_dcbnl_ops; -#endif /* CONFIG_DCB */ } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 5304bc1fbecd..5d6137faf7d1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -18,8 +18,6 @@ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 */ -#ifdef CONFIG_DEBUG_FS - #include "fm10k.h" #include <linux/debugfs.h> @@ -258,5 +256,3 @@ void fm10k_dbg_exit(void) debugfs_remove_recursive(dbg_root); dbg_root = NULL; } - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 2ce0eba5e040..2f6a05b57228 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -111,12 +111,14 @@ static const struct fm10k_stats fm10k_gstrings_pf_stats[] = { static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = { FM10K_MBX_STAT("mbx_tx_busy", tx_busy), - FM10K_MBX_STAT("mbx_tx_oversized", tx_dropped), + FM10K_MBX_STAT("mbx_tx_dropped", tx_dropped), FM10K_MBX_STAT("mbx_tx_messages", tx_messages), FM10K_MBX_STAT("mbx_tx_dwords", tx_dwords), + FM10K_MBX_STAT("mbx_tx_mbmem_pulled", tx_mbmem_pulled), FM10K_MBX_STAT("mbx_rx_messages", rx_messages), FM10K_MBX_STAT("mbx_rx_dwords", rx_dwords), FM10K_MBX_STAT("mbx_rx_parse_err", rx_parse_err), + FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed), }; #define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats) @@ -125,7 +127,7 @@ static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = { #define FM10K_MBX_STATS_LEN ARRAY_SIZE(fm10k_gstrings_mbx_stats) #define FM10K_QUEUE_STATS_LEN(_n) \ - ( (_n) * 2 * (sizeof(struct fm10k_queue_stats) / sizeof(u64))) + ((_n) * 2 * (sizeof(struct fm10k_queue_stats) / sizeof(u64))) #define FM10K_STATIC_STATS_LEN (FM10K_GLOBAL_STATS_LEN + \ FM10K_NETDEV_STATS_LEN + \ @@ -257,7 +259,8 @@ static int fm10k_get_sset_count(struct net_device *dev, int sset) stats_len += FM10K_DEBUG_STATS_LEN; if (iov_data) - stats_len += FM10K_MBX_STATS_LEN * iov_data->num_vfs; + stats_len += FM10K_MBX_STATS_LEN * + iov_data->num_vfs; } return stats_len; @@ -296,14 +299,16 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, if (interface->flags & FM10K_FLAG_DEBUG_STATS) { for (i = 0; i < FM10K_DEBUG_STATS_LEN; i++) { - p = (char *)interface + fm10k_gstrings_debug_stats[i].stat_offset; + p = (char *)interface + + fm10k_gstrings_debug_stats[i].stat_offset; *(data++) = (fm10k_gstrings_debug_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } } for (i = 0; i < FM10K_MBX_STATS_LEN; i++) { - p = (char *)&interface->hw.mbx + fm10k_gstrings_mbx_stats[i].stat_offset; + p = (char *)&interface->hw.mbx + + fm10k_gstrings_mbx_stats[i].stat_offset; *(data++) = (fm10k_gstrings_mbx_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } @@ -320,6 +325,7 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, if ((interface->flags & FM10K_FLAG_DEBUG_STATS) && iov_data) { for (i = 0; i < iov_data->num_vfs; i++) { struct fm10k_vf_info *vf_info; + vf_info = &iov_data->vf_info[i]; /* skip stats if we don't have a vf info */ @@ -329,7 +335,8 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev, } for (j = 0; j < FM10K_MBX_STATS_LEN; j++) { - p = (char *)&vf_info->mbx + fm10k_gstrings_mbx_stats[j].stat_offset; + p = (char *)&vf_info->mbx + + fm10k_gstrings_mbx_stats[j].stat_offset; *(data++) = (fm10k_gstrings_mbx_stats[j].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } @@ -699,12 +706,10 @@ static int fm10k_get_coalesce(struct net_device *dev, { struct fm10k_intfc *interface = netdev_priv(dev); - ec->use_adaptive_tx_coalesce = - !!(interface->tx_itr & FM10K_ITR_ADAPTIVE); + ec->use_adaptive_tx_coalesce = ITR_IS_ADAPTIVE(interface->tx_itr); ec->tx_coalesce_usecs = interface->tx_itr & ~FM10K_ITR_ADAPTIVE; - ec->use_adaptive_rx_coalesce = - !!(interface->rx_itr & FM10K_ITR_ADAPTIVE); + ec->use_adaptive_rx_coalesce = ITR_IS_ADAPTIVE(interface->rx_itr); ec->rx_coalesce_usecs = interface->rx_itr & ~FM10K_ITR_ADAPTIVE; return 0; @@ -729,10 +734,10 @@ static int fm10k_set_coalesce(struct net_device *dev, /* set initial values for adaptive ITR */ if (ec->use_adaptive_tx_coalesce) - tx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_10K; + tx_itr = FM10K_ITR_ADAPTIVE | FM10K_TX_ITR_DEFAULT; if (ec->use_adaptive_rx_coalesce) - rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K; + rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; /* update interface */ interface->tx_itr = tx_itr; @@ -1020,7 +1025,6 @@ static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags) return 0; } - static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) { return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index e76a44cf330c..75ff1092b7ee 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,7 +28,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.15.2-k" +#define DRV_VERSION "0.19.3-k" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; static const char fm10k_driver_string[] = @@ -917,7 +917,7 @@ static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags) /* set timestamping bits */ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && likely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) - desc_flags |= FM10K_TXD_FLAG_TIME; + desc_flags |= FM10K_TXD_FLAG_TIME; /* set checksum offload bits */ desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM, @@ -1094,11 +1094,11 @@ dma_error: netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, struct fm10k_ring *tx_ring) { + u16 count = TXD_USE_COUNT(skb_headlen(skb)); struct fm10k_tx_buffer *first; - int tso; - u32 tx_flags = 0; unsigned short f; - u16 count = TXD_USE_COUNT(skb_headlen(skb)); + u32 tx_flags = 0; + int tso; /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD, @@ -1363,10 +1363,10 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, **/ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) { - unsigned int avg_wire_size, packets; + unsigned int avg_wire_size, packets, itr_round; /* Only update ITR if we are using adaptive setting */ - if (!(ring_container->itr & FM10K_ITR_ADAPTIVE)) + if (!ITR_IS_ADAPTIVE(ring_container->itr)) goto clear_counts; packets = ring_container->total_packets; @@ -1375,18 +1375,44 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) avg_wire_size = ring_container->total_bytes / packets; - /* Add 24 bytes to size to account for CRC, preamble, and gap */ - avg_wire_size += 24; - - /* Don't starve jumbo frames */ - if (avg_wire_size > 3000) - avg_wire_size = 3000; + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (34 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 360) { + /* Start at 250K ints/sec and gradually drop to 77K ints/sec */ + avg_wire_size *= 8; + avg_wire_size += 376; + } else if (avg_wire_size <= 1152) { + /* 77K ints/sec to 45K ints/sec */ + avg_wire_size *= 3; + avg_wire_size += 2176; + } else if (avg_wire_size <= 1920) { + /* 45K ints/sec to 38K ints/sec */ + avg_wire_size += 4480; + } else { + /* plateau at a limit of 38K ints/sec */ + avg_wire_size = 6656; + } - /* Give a little boost to mid-size frames */ - if ((avg_wire_size > 300) && (avg_wire_size < 1200)) - avg_wire_size /= 3; - else - avg_wire_size /= 2; + /* Perform final bitshift for division after rounding up to ensure + * that the calculation will never get below a 1. The bit shift + * accounts for changes in the ITR due to PCIe link speed. + */ + itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8; + avg_wire_size += (1 << itr_round) - 1; + avg_wire_size >>= itr_round; /* write back value and retain adaptive flag */ ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE; @@ -1428,11 +1454,15 @@ static int fm10k_poll(struct napi_struct *napi, int budget) fm10k_for_each_ring(ring, q_vector->tx) clean_complete &= fm10k_clean_tx_irq(q_vector, ring); + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + return budget; + /* attempt to distribute budget to each queue fairly, but don't * allow the budget to go below 1 because we'll exit polling */ if (q_vector->rx.count > 1) - per_ring_budget = max(budget/q_vector->rx.count, 1); + per_ring_budget = max(budget / q_vector->rx.count, 1); else per_ring_budget = budget; @@ -1600,6 +1630,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, q_vector->tx.ring = ring; q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK; q_vector->tx.itr = interface->tx_itr; + q_vector->tx.itr_scale = interface->hw.mac.itr_scale; q_vector->tx.count = txr_count; while (txr_count) { @@ -1628,6 +1659,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, /* save Rx ring container info */ q_vector->rx.ring = ring; q_vector->rx.itr = interface->rx_itr; + q_vector->rx.itr_scale = interface->hw.mac.itr_scale; q_vector->rx.count = rxr_count; while (rxr_count) { @@ -1966,8 +1998,10 @@ int fm10k_init_queueing_scheme(struct fm10k_intfc *interface) /* Allocate memory for queues */ err = fm10k_alloc_q_vectors(interface); - if (err) + if (err) { + fm10k_reset_msix_capability(interface); return err; + } /* Map rings to devices, and map devices to physical queues */ fm10k_assign_rings(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index af09a1b272e6..c7fea47b8909 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -375,6 +375,8 @@ static void fm10k_mbx_write_copy(struct fm10k_hw *hw, if (!tail) tail++; + mbx->tx_mbmem_pulled++; + /* write message to hardware FIFO */ fm10k_write_reg(hw, mbmem + tail++, *(head++)); } while (--len && --end); @@ -459,6 +461,8 @@ static void fm10k_mbx_read_copy(struct fm10k_hw *hw, if (!head) head++; + mbx->rx_mbmem_pushed++; + /* read message from hardware FIFO */ *(tail++) = fm10k_read_reg(hw, mbmem + head++); } while (--len && --end); @@ -899,7 +903,7 @@ static void fm10k_mbx_create_disconnect_hdr(struct fm10k_mbx_info *mbx) } /** - * fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mailbox header + * fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mbox hdr * @mbx: pointer to mailbox * * This function creates a fake disconnect header for loading into remote @@ -2136,6 +2140,7 @@ s32 fm10k_sm_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx, { mbx->mbx_reg = FM10K_GMBX; mbx->mbmem_reg = FM10K_MBMEM_PF(0); + /* start out in closed state */ mbx->state = FM10K_STATE_CLOSED; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h index 0419a7f0035e..c4f18a8f176c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -291,8 +291,10 @@ struct fm10k_mbx_info { u64 tx_dropped; u64 tx_messages; u64 tx_dwords; + u64 tx_mbmem_pulled; u64 rx_messages; u64 rx_dwords; + u64 rx_mbmem_pushed; u64 rx_parse_err; /* Buffer to store messages */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 639263d5e833..d9854d39576d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -608,7 +608,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) unsigned int r_idx = skb->queue_mapping; int err; - if ((skb->protocol == htons(ETH_P_8021Q)) && + if ((skb->protocol == htons(ETH_P_8021Q)) && !skb_vlan_tag_present(skb)) { /* FM10K only supports hardware tagging, any tags in frame * are considered 2nd level or "outer" tags @@ -627,10 +627,12 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) /* verify the skb head is not shared */ err = skb_cow_head(skb, 0); - if (err) + if (err) { + dev_kfree_skb(skb); return NETDEV_TX_OK; + } - /* locate vlan header */ + /* locate VLAN header */ vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); /* pull the 2 key pieces of data out of it */ @@ -703,7 +705,7 @@ static void fm10k_tx_timeout(struct net_device *netdev) } else { netif_info(interface, drv, netdev, "Fake Tx hang detected with timeout of %d seconds\n", - netdev->watchdog_timeo/HZ); + netdev->watchdog_timeo / HZ); /* fake Tx hang - increase the kernel timeout */ if (netdev->watchdog_timeo < TX_TIMEO_LIMIT) @@ -776,7 +778,7 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) if (!set) clear_bit(vid, interface->active_vlans); - /* disable the default VID on ring if we have an active VLAN */ + /* disable the default VLAN ID on ring if we have an active VLAN */ for (i = 0; i < interface->num_rx_queues; i++) { struct fm10k_ring *rx_ring = interface->rx_ring[i]; u16 rx_vid = rx_ring->vid & (VLAN_N_VID - 1); @@ -787,7 +789,9 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) rx_ring->vid &= ~FM10K_VLAN_CLEAR; } - /* Do not remove default VID related entries from VLAN and MAC tables */ + /* Do not remove default VLAN ID related entries from VLAN and MAC + * tables + */ if (!set && vid == hw->mac.default_vid) return 0; @@ -812,7 +816,7 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) if (err) goto err_out; - /* set vid prior to syncing/unsyncing the VLAN */ + /* set VLAN ID prior to syncing/unsyncing the VLAN */ interface->vid = vid + (set ? VLAN_N_VID : 0); /* Update the unicast and multicast address list to add/drop VLAN */ @@ -1386,8 +1390,9 @@ static const struct net_device_ops fm10k_netdev_ops = { #define DEFAULT_DEBUG_LEVEL_SHIFT 3 -struct net_device *fm10k_alloc_netdev(void) +struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info) { + netdev_features_t hw_features; struct fm10k_intfc *interface; struct net_device *dev; @@ -1410,27 +1415,31 @@ struct net_device *fm10k_alloc_netdev(void) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXHASH | NETIF_F_RXCSUM; + /* Only the PF can support VXLAN and NVGRE tunnel offloads */ + if (info->mac == fm10k_mac_pf) { + dev->hw_enc_features = NETIF_F_IP_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + /* all features defined to this point should be changeable */ - dev->hw_features |= dev->features; + hw_features = dev->features; /* allow user to enable L2 forwarding acceleration */ - dev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; + hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; /* configure VLAN features */ dev->vlan_features |= dev->features; - /* configure tunnel offloads */ - dev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_IPV6_CSUM; - /* we want to leave these both on as we cannot disable VLAN tag * insertion or stripping on the hardware since it is contained * in the FTAG and not in the frame itself. @@ -1441,5 +1450,7 @@ struct net_device *fm10k_alloc_netdev(void) dev->priv_flags |= IFF_UNICAST_FLT; + dev->hw_features |= hw_features; + return dev; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 74be792f3f1b..020f6dce4154 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -159,13 +159,31 @@ static void fm10k_reinit(struct fm10k_intfc *interface) fm10k_mbx_free_irq(interface); + /* free interrupts */ + fm10k_clear_queueing_scheme(interface); + /* delay any future reset requests */ interface->last_reset = jiffies + (10 * HZ); /* reset and initialize the hardware so it is in a known state */ - err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw); - if (err) + err = hw->mac.ops.reset_hw(hw); + if (err) { + dev_err(&interface->pdev->dev, "reset_hw failed: %d\n", err); + goto reinit_err; + } + + err = hw->mac.ops.init_hw(hw); + if (err) { dev_err(&interface->pdev->dev, "init_hw failed: %d\n", err); + goto reinit_err; + } + + err = fm10k_init_queueing_scheme(interface); + if (err) { + dev_err(&interface->pdev->dev, + "init_queueing_scheme failed: %d\n", err); + goto reinit_err; + } /* reassociate interrupts */ fm10k_mbx_request_irq(interface); @@ -193,6 +211,10 @@ static void fm10k_reinit(struct fm10k_intfc *interface) fm10k_iov_resume(interface->pdev); +reinit_err: + if (err) + netif_device_detach(netdev); + rtnl_unlock(); clear_bit(__FM10K_RESETTING, &interface->state); @@ -563,7 +585,7 @@ static void fm10k_configure_tx_ring(struct fm10k_intfc *interface, /* store tail pointer */ ring->tail = &interface->uc_addr[FM10K_TDT(reg_idx)]; - /* reset ntu and ntc to place SW in sync with hardwdare */ + /* reset ntu and ntc to place SW in sync with hardware */ ring->next_to_clean = 0; ring->next_to_use = 0; @@ -579,6 +601,13 @@ static void fm10k_configure_tx_ring(struct fm10k_intfc *interface, fm10k_write_reg(hw, FM10K_PFVTCTL(reg_idx), FM10K_PFVTCTL_FTAG_DESC_ENABLE); + /* Initialize XPS */ + if (!test_and_set_bit(__FM10K_TX_XPS_INIT_DONE, &ring->state) && + ring->q_vector) + netif_set_xps_queue(ring->netdev, + &ring->q_vector->affinity_mask, + ring->queue_index); + /* enable queue */ fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), txdctl); } @@ -669,7 +698,7 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, /* store tail pointer */ ring->tail = &interface->uc_addr[FM10K_RDT(reg_idx)]; - /* reset ntu and ntc to place SW in sync with hardwdare */ + /* reset ntu and ntc to place SW in sync with hardware */ ring->next_to_clean = 0; ring->next_to_use = 0; ring->next_to_alloc = 0; @@ -694,7 +723,7 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, /* assign default VLAN to queue */ ring->vid = hw->mac.default_vid; - /* if we have an active VLAN, disable default VID */ + /* if we have an active VLAN, disable default VLAN ID */ if (test_bit(hw->mac.default_vid, interface->active_vlans)) ring->vid |= FM10K_VLAN_CLEAR; @@ -846,7 +875,7 @@ static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data) struct fm10k_q_vector *q_vector = data; if (q_vector->rx.count || q_vector->tx.count) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -859,7 +888,8 @@ static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data) /* re-enable mailbox interrupt and indicate 20us delay */ fm10k_write_reg(hw, FM10K_VFITR(FM10K_MBX_VECTOR), - FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY); + FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >> + hw->mac.itr_scale)); /* service upstream mailbox */ if (fm10k_mbx_trylock(interface)) { @@ -897,7 +927,7 @@ void fm10k_netpoll(struct net_device *netdev) #endif #define FM10K_ERR_MSG(type) case (type): error = #type; break static void fm10k_handle_fault(struct fm10k_intfc *interface, int type, - struct fm10k_fault *fault) + struct fm10k_fault *fault) { struct pci_dev *pdev = interface->pdev; struct fm10k_hw *hw = &interface->hw; @@ -1090,7 +1120,8 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data) /* re-enable mailbox interrupt and indicate 20us delay */ fm10k_write_reg(hw, FM10K_ITR(FM10K_MBX_VECTOR), - FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY); + FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >> + hw->mac.itr_scale)); return IRQ_HANDLED; } @@ -1101,6 +1132,10 @@ void fm10k_mbx_free_irq(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int itr_reg; + /* no mailbox IRQ to free if MSI-X is not enabled */ + if (!interface->msix_entries) + return; + /* disconnect the mailbox */ hw->mbx.ops.disconnect(hw, &hw->mbx); @@ -1269,7 +1304,7 @@ static s32 fm10k_update_pvid(struct fm10k_hw *hw, u32 **results, if (!fm10k_glort_valid_pf(hw, glort)) return FM10K_ERR_PARAM; - /* verify VID is valid */ + /* verify VLAN ID is valid */ if (pvid >= FM10K_VLAN_TABLE_VID_MAX) return FM10K_ERR_PARAM; @@ -1388,14 +1423,14 @@ static int fm10k_mbx_request_irq_pf(struct fm10k_intfc *interface) } /* Enable interrupts w/ no moderation for "other" interrupts */ - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), other_itr); - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), other_itr); - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_SRAM), other_itr); - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_MaxHoldTime), other_itr); - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_VFLR), other_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), other_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), other_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_sram), other_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_max_hold_time), other_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_vflr), other_itr); /* Enable interrupts w/ moderation for mailbox */ - fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_Mailbox), mbx_itr); + fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_mailbox), mbx_itr); /* Enable individual interrupt causes */ fm10k_write_reg(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) | @@ -1423,10 +1458,15 @@ int fm10k_mbx_request_irq(struct fm10k_intfc *interface) err = fm10k_mbx_request_irq_pf(interface); else err = fm10k_mbx_request_irq_vf(interface); + if (err) + return err; /* connect mailbox */ - if (!err) - err = hw->mbx.ops.connect(hw, &hw->mbx); + err = hw->mbx.ops.connect(hw, &hw->mbx); + + /* if the mailbox failed to connect, then free IRQ */ + if (err) + fm10k_mbx_free_irq(interface); return err; } @@ -1455,8 +1495,10 @@ void fm10k_qv_free_irq(struct fm10k_intfc *interface) if (!q_vector->tx.count && !q_vector->rx.count) continue; - /* disable interrupts */ + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(entry->vector, NULL); + /* disable interrupts */ writel(FM10K_ITR_MASK_SET, q_vector->itr); free_irq(entry->vector, q_vector); @@ -1514,6 +1556,9 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) goto err_out; } + /* assign the mask for this irq */ + irq_set_affinity_hint(entry->vector, &q_vector->affinity_mask); + /* Enable q_vector */ writel(FM10K_ITR_ENABLE, q_vector->itr); @@ -1534,8 +1579,10 @@ err_out: if (!q_vector->tx.count && !q_vector->rx.count) continue; - /* disable interrupts */ + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(entry->vector, NULL); + /* disable interrupts */ writel(FM10K_ITR_MASK_SET, q_vector->itr); free_irq(entry->vector, q_vector); @@ -1684,7 +1731,13 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->last_reset = jiffies + (10 * HZ); /* reset and initialize the hardware so it is in a known state */ - err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw); + err = hw->mac.ops.reset_hw(hw); + if (err) { + dev_err(&pdev->dev, "reset_hw failed: %d\n", err); + return err; + } + + err = hw->mac.ops.init_hw(hw); if (err) { dev_err(&pdev->dev, "init_hw failed: %d\n", err); return err; @@ -1722,13 +1775,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, pci_resource_len(pdev, 4)); hw->sw_addr = interface->sw_addr; - /* Only the PF can support VXLAN and NVGRE offloads */ - if (hw->mac.type != fm10k_mac_pf) { - netdev->hw_enc_features = 0; - netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - } - /* initialize DCBNL interface */ fm10k_dcbnl_set_ops(netdev); @@ -1749,8 +1795,8 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->rx_ring_count = FM10K_DEFAULT_RXD; /* set default interrupt moderation */ - interface->tx_itr = FM10K_ITR_10K; - interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K; + interface->tx_itr = FM10K_TX_ITR_DEFAULT; + interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; /* initialize vxlan_port list */ INIT_LIST_HEAD(&interface->vxlan_port); @@ -1835,17 +1881,18 @@ static void fm10k_slot_warn(struct fm10k_intfc *interface) return; } - if (max_gts < expected_gts) { - dev_warn(&interface->pdev->dev, - "This device requires %dGT/s of bandwidth for optimal performance.\n", - expected_gts); - dev_warn(&interface->pdev->dev, - "A %sslot with x%d lanes is suggested.\n", - (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " : - hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " : - hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""), - hw->bus_caps.width); - } + if (max_gts >= expected_gts) + return; + + dev_warn(&interface->pdev->dev, + "This device requires %dGT/s of bandwidth for optimal performance.\n", + expected_gts); + dev_warn(&interface->pdev->dev, + "A %sslot with x%d lanes is suggested.\n", + (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " : + hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " : + hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""), + hw->bus_caps.width); } /** @@ -1859,8 +1906,7 @@ static void fm10k_slot_warn(struct fm10k_intfc *interface) * The OS initialization, configuring of the interface private structure, * and a hardware reset occur. **/ -static int fm10k_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct fm10k_intfc *interface; @@ -1894,7 +1940,7 @@ static int fm10k_probe(struct pci_dev *pdev, pci_set_master(pdev); pci_save_state(pdev); - netdev = fm10k_alloc_netdev(); + netdev = fm10k_alloc_netdev(fm10k_info_tbl[ent->driver_data]); if (!netdev) { err = -ENOMEM; goto err_alloc_netdev; @@ -2071,8 +2117,10 @@ static int fm10k_resume(struct pci_dev *pdev) /* reset hardware to known state */ err = hw->mac.ops.init_hw(&interface->hw); - if (err) + if (err) { + dev_err(&pdev->dev, "init_hw failed: %d\n", err); return err; + } /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); @@ -2185,6 +2233,9 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) fm10k_close(netdev); + /* free interrupts */ + fm10k_clear_queueing_scheme(interface); + fm10k_mbx_free_irq(interface); pci_disable_device(pdev); @@ -2248,11 +2299,22 @@ static void fm10k_io_resume(struct pci_dev *pdev) int err = 0; /* reset hardware to known state */ - hw->mac.ops.init_hw(&interface->hw); + err = hw->mac.ops.init_hw(&interface->hw); + if (err) { + dev_err(&pdev->dev, "init_hw failed: %d\n", err); + return; + } /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); + err = fm10k_init_queueing_scheme(interface); + if (err) { + dev_err(&interface->pdev->dev, + "init_queueing_scheme failed: %d\n", err); + return; + } + /* reassociate interrupts */ fm10k_mbx_request_irq(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 8c0bdc4e4edd..808307e67718 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -150,19 +150,26 @@ static s32 fm10k_init_hw_pf(struct fm10k_hw *hw) FM10K_TPH_RXCTRL_HDR_WROEN); } - /* set max hold interval to align with 1.024 usec in all modes */ + /* set max hold interval to align with 1.024 usec in all modes and + * store ITR scale + */ switch (hw->bus.speed) { case fm10k_bus_speed_2500: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN1; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN1; break; case fm10k_bus_speed_5000: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN2; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN2; break; case fm10k_bus_speed_8000: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN3; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3; break; default: dma_ctrl = 0; + /* just in case, assume Gen3 ITR scale */ + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3; break; } @@ -259,7 +266,6 @@ static s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw) { u8 perm_addr[ETH_ALEN]; u32 serial_num; - int i; serial_num = fm10k_read_reg(hw, FM10K_SM_AREA(1)); @@ -281,10 +287,8 @@ static s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw) perm_addr[4] = (u8)(serial_num >> 8); perm_addr[5] = (u8)(serial_num); - for (i = 0; i < ETH_ALEN; i++) { - hw->mac.perm_addr[i] = perm_addr[i]; - hw->mac.addr[i] = perm_addr[i]; - } + ether_addr_copy(hw->mac.perm_addr, perm_addr); + ether_addr_copy(hw->mac.addr, perm_addr); return 0; } @@ -325,7 +329,7 @@ static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort, /* clear set bit from VLAN ID */ vid &= ~FM10K_VLAN_CLEAR; - /* if glort or vlan are not valid return error */ + /* if glort or VLAN are not valid return error */ if (!fm10k_glort_valid_pf(hw, glort) || vid >= FM10K_VLAN_TABLE_VID_MAX) return FM10K_ERR_PARAM; @@ -334,8 +338,8 @@ static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort, ((u32)mac[3] << 16) | ((u32)mac[4] << 8) | ((u32)mac[5])); - mac_update.mac_upper = cpu_to_le16(((u32)mac[0] << 8) | - ((u32)mac[1])); + mac_update.mac_upper = cpu_to_le16(((u16)mac[0] << 8) | + ((u16)mac[1])); mac_update.vlan = cpu_to_le16(vid); mac_update.glort = cpu_to_le16(glort); mac_update.action = add ? 0 : 1; @@ -410,6 +414,7 @@ static s32 fm10k_update_xcast_mode_pf(struct fm10k_hw *hw, u16 glort, u8 mode) if (mode > FM10K_XCAST_MODE_NONE) return FM10K_ERR_PARAM; + /* if glort is not valid return error */ if (!fm10k_glort_valid_pf(hw, glort)) return FM10K_ERR_PARAM; @@ -903,6 +908,13 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx), tdbal); fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx), tdbah); + /* Provide the VF the ITR scale, using software-defined fields in TDLEN + * to pass the information during VF initialization. See definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more details. + */ + fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx), hw->mac.itr_scale << + FM10K_TDLEN_ITR_SCALE_SHIFT); + err_out: /* configure Queue control register */ txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & @@ -910,7 +922,7 @@ err_out: txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | FM10K_TXQCTL_VF | vf_idx; - /* assign VID */ + /* assign VLAN ID */ for (i = 0; i < queues_per_pool; i++) fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl); @@ -1035,6 +1047,12 @@ static s32 fm10k_iov_reset_resources_pf(struct fm10k_hw *hw, for (i = queues_per_pool; i--;) { fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx + i), tdbal); fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx + i), tdbah); + /* See definition of FM10K_TDLEN_ITR_SCALE_SHIFT for an + * explanation of how TDLEN is used. + */ + fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx + i), + hw->mac.itr_scale << + FM10K_TDLEN_ITR_SCALE_SHIFT); fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx + i), vf_q_idx + i); fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx + i), vf_q_idx + i); } @@ -1155,14 +1173,14 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, } /** - * fm10k_iov_select_vid - Select correct default VID + * fm10k_iov_select_vid - Select correct default VLAN ID * @hw: Pointer to hardware structure - * @vid: VID to correct + * @vid: VLAN ID to correct * - * Will report an error if VID is out of range. For VID = 0, it will return - * either the pf_vid or sw_vid depending on which one is set. + * Will report an error if the VLAN ID is out of range. For VID = 0, it will + * return either the pf_vid or sw_vid depending on which one is set. */ -static inline s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) +static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) { if (!vid) return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid; @@ -1212,11 +1230,11 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, set = !(vid & FM10K_VLAN_CLEAR); vid &= ~FM10K_VLAN_CLEAR; - err = fm10k_iov_select_vid(vf_info, vid); + err = fm10k_iov_select_vid(vf_info, (u16)vid); if (err < 0) return err; - else - vid = err; + + vid = err; /* update VSI info for VF in regards to VLAN table */ err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); @@ -1241,8 +1259,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, err = fm10k_iov_select_vid(vf_info, vlan); if (err < 0) return err; - else - vlan = err; + + vlan = (u16)err; /* notify switch of request for new unicast address */ err = hw->mac.ops.update_uc_addr(hw, vf_info->glort, @@ -1267,8 +1285,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, err = fm10k_iov_select_vid(vf_info, vlan); if (err < 0) return err; - else - vlan = err; + + vlan = (u16)err; /* notify switch of request for new multicast address */ err = hw->mac.ops.update_mc_addr(hw, vf_info->glort, @@ -1396,14 +1414,6 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results, return err; } -const struct fm10k_msg_data fm10k_iov_msg_data_pf[] = { - FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), - FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf), - FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf), - FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf), - FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error), -}; - /** * fm10k_update_stats_hw_pf - Updates hardware related statistics of PF * @hw: pointer to hardware structure @@ -1431,9 +1441,10 @@ static void fm10k_update_hw_stats_pf(struct fm10k_hw *hw, xec = fm10k_read_hw_stats_32b(hw, FM10K_STATS_XEC, &stats->xec); vlan_drop = fm10k_read_hw_stats_32b(hw, FM10K_STATS_VLAN_DROP, &stats->vlan_drop); - loopback_drop = fm10k_read_hw_stats_32b(hw, - FM10K_STATS_LOOPBACK_DROP, - &stats->loopback_drop); + loopback_drop = + fm10k_read_hw_stats_32b(hw, + FM10K_STATS_LOOPBACK_DROP, + &stats->loopback_drop); nodesc_drop = fm10k_read_hw_stats_32b(hw, FM10K_STATS_NODESC_DROP, &stats->nodesc_drop); @@ -1678,8 +1689,8 @@ const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[] = { * * This handler configures the default VLAN for the PF **/ -s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results, - struct fm10k_mbx_info *mbx) +static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results, + struct fm10k_mbx_info *mbx) { u16 glort, pvid; u32 pvid_update; @@ -1698,7 +1709,7 @@ s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results, if (!fm10k_glort_valid_pf(hw, glort)) return FM10K_ERR_PARAM; - /* verify VID is valid */ + /* verify VLAN ID is valid */ if (pvid >= FM10K_VLAN_TABLE_VID_MAX) return FM10K_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index 40a0dbc62a04..a8fc512a2416 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -107,8 +107,6 @@ extern const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[]; #define FM10K_PF_MSG_LPORT_MAP_HANDLER(func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_MAP, \ fm10k_lport_map_msg_attr, func) -s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *, u32 **, - struct fm10k_mbx_info *); extern const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[]; #define FM10K_PF_MSG_UPDATE_PVID_HANDLER(func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_UPDATE_PVID, \ @@ -129,7 +127,6 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); -extern const struct fm10k_msg_data fm10k_iov_msg_data_pf[]; extern struct fm10k_info fm10k_pf_info; #endif /* _FM10K_PF_H */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index 9b29d7b0377a..95afb5c0c9c4 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -48,8 +48,8 @@ s32 fm10k_tlv_msg_init(u32 *msg, u16 msg_id) * the attribute buffer. It will return success if provided with a valid * pointers. **/ -s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id, - const unsigned char *string) +static s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id, + const unsigned char *string) { u32 attr_data = 0, len = 0; u32 *attr; @@ -98,7 +98,7 @@ s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id, * it in the array pointed by by string. It will return success if provided * with a valid pointers. **/ -s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string) +static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string) { u32 len; @@ -353,7 +353,7 @@ s32 fm10k_tlv_attr_get_le_struct(u32 *attr, void *le_struct, u32 len) * function will return NULL on failure, and a pointer to the start * of the nested attributes on success. **/ -u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id) +static u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id) { u32 *attr; @@ -370,7 +370,7 @@ u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id) } /** - * fm10k_tlv_attr_nest_start - Start a set of nested attributes + * fm10k_tlv_attr_nest_stop - Stop a set of nested attributes * @msg: Pointer to message block * * This function closes off an existing set of nested attributes. The @@ -378,7 +378,7 @@ u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id) * the case of a nest within the nest this would be the outer nest pointer. * This function will return success provided all pointers are valid. **/ -s32 fm10k_tlv_attr_nest_stop(u32 *msg) +static s32 fm10k_tlv_attr_nest_stop(u32 *msg) { u32 *attr; u32 len; @@ -483,8 +483,8 @@ static s32 fm10k_tlv_attr_validate(u32 *attr, * FM10K_NOT_IMPLEMENTED for any attribute that is outside of the array * and 0 on success. **/ -s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, - const struct fm10k_tlv_attr *tlv_attr) +static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, + const struct fm10k_tlv_attr *tlv_attr) { u32 i, attr_id, offset = 0; s32 err = 0; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h index 7e045e8bf1eb..d5ad359c1d54 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -106,8 +106,6 @@ struct fm10k_msg_data { #define FM10K_MSG_HANDLER(id, attr, func) { id, attr, func } s32 fm10k_tlv_msg_init(u32 *, u16); -s32 fm10k_tlv_attr_put_null_string(u32 *, u16, const unsigned char *); -s32 fm10k_tlv_attr_get_null_string(u32 *, unsigned char *); s32 fm10k_tlv_attr_put_mac_vlan(u32 *, u16, const u8 *, u16); s32 fm10k_tlv_attr_get_mac_vlan(u32 *, u8 *, u16 *); s32 fm10k_tlv_attr_put_bool(u32 *, u16); @@ -147,9 +145,6 @@ s32 fm10k_tlv_attr_get_value(u32 *, void *, u32); fm10k_tlv_attr_get_value(attr, ptr, sizeof(s64)) s32 fm10k_tlv_attr_put_le_struct(u32 *, u16, const void *, u32); s32 fm10k_tlv_attr_get_le_struct(u32 *, void *, u32); -u32 *fm10k_tlv_attr_nest_start(u32 *, u16); -s32 fm10k_tlv_attr_nest_stop(u32 *); -s32 fm10k_tlv_attr_parse(u32 *, u32 **, const struct fm10k_tlv_attr *); s32 fm10k_tlv_msg_parse(struct fm10k_hw *, u32 *, struct fm10k_mbx_info *, const struct fm10k_msg_data *); s32 fm10k_tlv_msg_error(struct fm10k_hw *hw, u32 **results, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 318a212f0a78..098883d2875f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -77,6 +77,7 @@ struct fm10k_hw; #define FM10K_PCIE_SRIOV_CTRL_VFARI 0x10 #define FM10K_ERR_PARAM -2 +#define FM10K_ERR_NO_RESOURCES -3 #define FM10K_ERR_REQUESTS_PENDING -4 #define FM10K_ERR_RESET_REQUESTED -5 #define FM10K_ERR_DMA_PENDING -6 @@ -271,6 +272,20 @@ struct fm10k_hw; #define FM10K_TDBAL(_n) ((0x40 * (_n)) + 0x8000) #define FM10K_TDBAH(_n) ((0x40 * (_n)) + 0x8001) #define FM10K_TDLEN(_n) ((0x40 * (_n)) + 0x8002) +/* When fist initialized, VFs need to know the Interrupt Throttle Rate (ITR) + * scale which is based on the PCIe speed but the speed information in the PCI + * configuration space may not be accurate. The PF already knows the ITR scale + * but there is no defined method to pass that information from the PF to the + * VF. This is accomplished during VF initialization by temporarily co-opting + * the yet-to-be-used TDLEN register to have the PF store the ITR shift for + * the VF to retrieve before the VF needs to use the TDLEN register for its + * intended purpose, i.e. before the Tx resources are allocated. + */ +#define FM10K_TDLEN_ITR_SCALE_SHIFT 9 +#define FM10K_TDLEN_ITR_SCALE_MASK 0x00000E00 +#define FM10K_TDLEN_ITR_SCALE_GEN1 2 +#define FM10K_TDLEN_ITR_SCALE_GEN2 1 +#define FM10K_TDLEN_ITR_SCALE_GEN3 0 #define FM10K_TPH_TXCTRL(_n) ((0x40 * (_n)) + 0x8003) #define FM10K_TPH_TXCTRL_DESC_TPHEN 0x00000020 #define FM10K_TPH_TXCTRL_DESC_RROEN 0x00000200 @@ -339,7 +354,7 @@ struct fm10k_hw; #define FM10K_VLAN_TABLE_VID_MAX 4096 #define FM10K_VLAN_TABLE_VSI_MAX 64 #define FM10K_VLAN_LENGTH_SHIFT 16 -#define FM10K_VLAN_CLEAR (1 << 15) +#define FM10K_VLAN_CLEAR BIT(15) #define FM10K_VLAN_ALL \ ((FM10K_VLAN_TABLE_VID_MAX - 1) << FM10K_VLAN_LENGTH_SHIFT) @@ -373,13 +388,13 @@ struct fm10k_hw; #define FM10K_SW_SYSTIME_PULSE(_n) ((_n) + 0x02252) enum fm10k_int_source { - fm10k_int_Mailbox = 0, - fm10k_int_PCIeFault = 1, - fm10k_int_SwitchUpDown = 2, - fm10k_int_SwitchEvent = 3, - fm10k_int_SRAM = 4, - fm10k_int_VFLR = 5, - fm10k_int_MaxHoldTime = 6, + fm10k_int_mailbox = 0, + fm10k_int_pcie_fault = 1, + fm10k_int_switch_up_down = 2, + fm10k_int_switch_event = 3, + fm10k_int_sram = 4, + fm10k_int_vflr = 5, + fm10k_int_max_hold_time = 6, fm10k_int_sources_max_pf }; @@ -559,6 +574,7 @@ struct fm10k_mac_info { bool get_host_state; bool tx_ready; u32 dglort_map; + u8 itr_scale; }; struct fm10k_swapi_table_info { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 36c8b0aa08fd..5445c0fab49f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -28,7 +28,7 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) { u8 *perm_addr = hw->mac.perm_addr; - u32 bal = 0, bah = 0; + u32 bal = 0, bah = 0, tdlen; s32 err; u16 i; @@ -48,6 +48,9 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) ((u32)perm_addr[2]); } + /* restore default itr_scale for next VF initialization */ + tdlen = hw->mac.itr_scale << FM10K_TDLEN_ITR_SCALE_SHIFT; + /* The queues have already been disabled so we just need to * update their base address registers */ @@ -56,6 +59,12 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) fm10k_write_reg(hw, FM10K_TDBAH(i), bah); fm10k_write_reg(hw, FM10K_RDBAL(i), bal); fm10k_write_reg(hw, FM10K_RDBAH(i), bah); + /* Restore ITR scale in software-defined mechanism in TDLEN + * for next VF initialization. See definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more details on the use of + * TDLEN here. + */ + fm10k_write_reg(hw, FM10K_TDLEN(i), tdlen); } return 0; @@ -103,7 +112,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) s32 err; u16 i; - /* assume we always have at least 1 queue */ + /* verify we have at least 1 queue */ + if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) || + !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) { + err = FM10K_ERR_NO_RESOURCES; + goto reset_max_queues; + } + + /* determine how many queues we have */ for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) { /* verify the Descriptor cache offsets are increasing */ tqdloc = ~fm10k_read_reg(hw, FM10K_TQDLOC(i)); @@ -119,16 +135,28 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) /* shut down queues we own and reset DMA configuration */ err = fm10k_disable_queues_generic(hw, i); if (err) - return err; + goto reset_max_queues; /* record maximum queue count */ hw->mac.max_queues = i; - /* fetch default VLAN */ + /* fetch default VLAN and ITR scale */ hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) & FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT; + /* Read the ITR scale from TDLEN. See the definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is + * used here. + */ + hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) & + FM10K_TDLEN_ITR_SCALE_MASK) >> + FM10K_TDLEN_ITR_SCALE_SHIFT; return 0; + +reset_max_queues: + hw->mac.max_queues = 0; + + return err; } /* This structure defines the attibutes to be parsed below */ @@ -414,6 +442,7 @@ static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw, u16 glort, u8 mode) if (mode > FM10K_XCAST_MODE_NONE) return FM10K_ERR_PARAM; + /* generate message requesting to change xcast mode */ fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE); fm10k_tlv_attr_put_u8(msg, FM10K_LPORT_STATE_MSG_XCAST_MODE, mode); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4dd3e26129b4..b7bc014ae00b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -42,7 +42,6 @@ #include <linux/string.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/tcp.h> #include <linux/sctp.h> #include <linux/pkt_sched.h> #include <linux/ipv6.h> @@ -104,6 +103,7 @@ #define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) +#define I40E_PRIV_FLAGS_PS BIT(4) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -187,6 +187,7 @@ struct i40e_lump_tracking { #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4) #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -265,7 +266,7 @@ struct i40e_pf { u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ int queues_left; /* queues left unclaimed */ - u16 rss_size; /* num queues in the RSS array */ + u16 alloc_rss_size; /* allocated RSS queues */ u16 rss_size_max; /* HW defined max RSS queues */ u16 fdir_pf_filter_count; /* num of guaranteed filters for this PF */ u16 num_alloc_vsi; /* num VSIs this driver supports */ @@ -412,7 +413,7 @@ struct i40e_pf { u32 rx_hwtstamp_cleared; bool ptp_tx; bool ptp_rx; - u16 rss_table_size; + u16 rss_table_size; /* HW RSS table size */ /* These are only valid in NPAR modes */ u32 npar_max_bw; u32 npar_min_bw; @@ -487,6 +488,7 @@ struct i40e_vsi { u32 tx_restart; u32 tx_busy; u64 tx_linearize; + u64 tx_force_wb; u32 rx_buf_failed; u32 rx_page_failed; @@ -504,8 +506,10 @@ struct i40e_vsi { u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ - u16 rss_table_size; - u16 rss_size; + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ u16 max_frame; u16 rx_hdr_len; @@ -575,6 +579,9 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ +#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */ + unsigned long hung_detected; /* Set/Reset for hung_detection logic */ + cpumask_t affinity_mask; struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; @@ -602,8 +609,8 @@ static inline char *i40e_nvm_version_str(struct i40e_hw *hw) full_ver = hw->nvm.oem_ver; ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); - build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) - & I40E_OEM_VER_BUILD_MASK); + build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & + I40E_OEM_VER_BUILD_MASK); patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); snprintf(buf, sizeof(buf), @@ -668,6 +675,8 @@ extern const char i40e_driver_name[]; extern const char i40e_driver_version_str[]; void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); @@ -691,7 +700,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, bool is_vf, bool is_netdev); void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan, bool is_vf, bool is_netdev); -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl); +int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); int i40e_vsi_release(struct i40e_vsi *vsi); @@ -709,7 +718,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, void i40e_veb_release(struct i40e_veb *veb); int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc); -i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); +int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); void i40e_vsi_remove_pvid(struct i40e_vsi *vsi); void i40e_vsi_reset_stats(struct i40e_vsi *vsi); void i40e_pf_reset_stats(struct i40e_pf *pf); @@ -767,6 +776,8 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid); int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid); struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); +int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr, + bool is_vf, bool is_netdev); bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 6584b6cd73fd..b22012a446a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -227,6 +227,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_update = 0x0703, i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, + i40e_aqc_opc_oem_post_update = 0x0720, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -1891,6 +1892,26 @@ struct i40e_aqc_nvm_config_data_immediate_field { I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field); +/* OEM Post Update (indirect 0x0720) + * no command data struct used + */ +struct i40e_aqc_nvm_oem_post_update { +#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA 0x01 + u8 sel_data; + u8 reserved[7]; +}; + +I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update); + +struct i40e_aqc_nvm_oem_post_update_buffer { + u8 str_len; + u8 dev_addr; + __le16 eeprom_addr; + u8 data[36]; +}; + +I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2403,4 +2424,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d4b7af9a2fc8..10744a698d6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -103,8 +103,8 @@ static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos += len; return len; @@ -353,8 +353,8 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -981,12 +981,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!cmd_buf) return count; bytes_not_copied = copy_from_user(cmd_buf, buffer, count); - if (bytes_not_copied < 0) { + if (bytes_not_copied) { kfree(cmd_buf); - return bytes_not_copied; + return -EFAULT; } - if (bytes_not_copied > 0) - count -= bytes_not_copied; cmd_buf[count] = '\0'; cmd_buf_tmp = strchr(cmd_buf, '\n'); @@ -1140,7 +1138,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); f = i40e_add_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (f && !ret) dev_info(&pf->pdev->dev, "add macaddr: %pM vlan=%d added to VSI %d\n", @@ -1179,7 +1177,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); i40e_del_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (!ret) dev_info(&pf->pdev->dev, "del macaddr: %pM vlan=%d removed from VSI %d\n", @@ -2034,8 +2032,8 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -2068,10 +2066,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, buffer, count); - if (bytes_not_copied < 0) - return bytes_not_copied; - else if (bytes_not_copied > 0) - count -= bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; i40e_dbg_netdev_ops_buf[count] = '\0'; buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3f385ffe420f..29d5833e24a3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -88,6 +88,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), + I40E_VSI_STAT("tx_force_wb", tx_force_wb), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -230,6 +231,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "LinkPolling", "flow-director-atr", "veb-stats", + "packet-split", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -2110,7 +2112,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = vsi->alloc_queue_pairs; + cmd->data = vsi->num_queue_pairs; ret = 0; break; case ETHTOOL_GRXFH: @@ -2583,7 +2585,6 @@ static int i40e_set_channels(struct net_device *dev, return -EINVAL; } -#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) /** * i40e_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure @@ -2611,10 +2612,9 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *lut, *seed = NULL; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; @@ -2622,24 +2622,20 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HLUT(i)); - indir[j++] = reg_val & 0xff; - indir[j++] = (reg_val >> 8) & 0xff; - indir[j++] = (reg_val >> 16) & 0xff; - indir[j++] = (reg_val >> 24) & 0xff; - } + seed = key; + lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + ret = i40e_get_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE); + if (ret) + goto out; + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)(lut[i]); - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HKEY(i)); - key[j++] = (u8)(reg_val & 0xff); - key[j++] = (u8)((reg_val >> 8) & 0xff); - key[j++] = (u8)((reg_val >> 16) & 0xff); - key[j++] = (u8)((reg_val >> 24) & 0xff); - } - } - return 0; +out: + kfree(lut); + + return ret; } /** @@ -2656,10 +2652,8 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *seed = NULL; + u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -2667,24 +2661,28 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = indir[j++]; - reg_val |= indir[j++] << 8; - reg_val |= indir[j++] << 16; - reg_val |= indir[j++] << 24; - wr32(hw, I40E_PFQF_HLUT(i), reg_val); - } - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = key[j++]; - reg_val |= key[j++] << 8; - reg_val |= key[j++] << 16; - reg_val |= key[j++] << 24; - wr32(hw, I40E_PFQF_HKEY(i), reg_val); + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; } + memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; } - return 0; + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; + } + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40e_config_rss(vsi, seed, vsi->rss_lut_user, + I40E_HLUT_ARRAY_SIZE); } /** @@ -2712,6 +2710,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_FD_ATR : 0; ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ? I40E_PRIV_FLAGS_VEB_STATS : 0; + ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? + I40E_PRIV_FLAGS_PS : 0; return ret_flags; } @@ -2726,6 +2726,26 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + bool reset_required = false; + + /* NOTE: MFP is not settable */ + + /* allow the user to control the method of receive + * buffer DMA, whether the packet is split at header + * boundaries into two separate buffers. In some cases + * one routine or the other will perform better. + */ + if ((flags & I40E_PRIV_FLAGS_PS) && + !(pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags |= I40E_FLAG_RX_PS_ENABLED; + pf->flags &= ~I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_PS) && + (pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags &= ~I40E_FLAG_RX_PS_ENABLED; + pf->flags |= I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG) pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED; @@ -2748,6 +2768,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) else pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index fe5d9bf3ed6d..579a46ca82df 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1544,8 +1544,6 @@ void i40e_fcoe_vsi_setup(struct i40e_pf *pf) if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) return; - BUG_ON(!pf->vsi[pf->lan_vsi]); - for (i = 0; i < pf->num_alloc_vsi; i++) { vsi = pf->vsi[i]; if (vsi && vsi->type == I40E_VSI_FCOE) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c index 79ae7beeafe5..daa9204426d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c @@ -762,7 +762,7 @@ static void i40e_write_byte(u8 *hmc_bits, /* prepare the bits and mask */ shift_width = ce_info->lsb % 8; - mask = BIT(ce_info->width) - 1; + mask = (u8)(BIT(ce_info->width) - 1); src_byte = *from; src_byte &= mask; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b825f978d441..b118deb08ce6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -38,8 +38,8 @@ static const char i40e_driver_string[] = #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 -#define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 46 +#define DRV_VERSION_MINOR 4 +#define DRV_VERSION_BUILD 8 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -55,6 +55,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -790,75 +792,6 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) #endif /** - * i40e_update_link_xoff_rx - Update XOFF received in link flow control mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in link flow control mode - **/ -static void i40e_update_link_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - struct i40e_hw *hw = &pf->hw; - u64 xoff = 0; - - if ((hw->fc.current_mode != I40E_FC_FULL) && - (hw->fc.current_mode != I40E_FC_RX_PAUSE)) - return; - - xoff = nsd->link_xoff_rx; - i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), - pf->stat_offsets_loaded, - &osd->link_xoff_rx, &nsd->link_xoff_rx); - - /* No new LFC xoff rx */ - if (!(nsd->link_xoff_rx - xoff)) - return; - -} - -/** - * i40e_update_prio_xoff_rx - Update XOFF received in PFC mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in PFC mode - **/ -static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false}; - struct i40e_dcbx_config *dcb_cfg; - struct i40e_hw *hw = &pf->hw; - u16 i; - u8 tc; - - dcb_cfg = &hw->local_dcbx_config; - - /* Collect Link XOFF stats when PFC is disabled */ - if (!dcb_cfg->pfc.pfcenable) { - i40e_update_link_xoff_rx(pf); - return; - } - - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - u64 prio_xoff = nsd->priority_xoff_rx[i]; - - i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), - pf->stat_offsets_loaded, - &osd->priority_xoff_rx[i], - &nsd->priority_xoff_rx[i]); - - /* No new PFC xoff rx */ - if (!(nsd->priority_xoff_rx[i] - prio_xoff)) - continue; - /* Get the TC for given priority */ - tc = dcb_cfg->etscfg.prioritytable[i]; - xoff[tc] = true; - } -} - -/** * i40e_update_vsi_stats - Update the vsi statistics counters. * @vsi: the VSI to be updated * @@ -881,6 +814,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) u64 bytes, packets; unsigned int start; u64 tx_linearize; + u64 tx_force_wb; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; @@ -899,7 +833,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) */ rx_b = rx_p = 0; tx_b = tx_p = 0; - tx_restart = tx_busy = tx_linearize = 0; + tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -917,6 +851,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; + tx_force_wb += p->tx_stats.tx_force_wb; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -934,6 +869,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; + vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1049,12 +985,18 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); - i40e_update_prio_xoff_rx(pf); /* handles I40E_GLPRT_LXOFFRXC */ + i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), + pf->stat_offsets_loaded, + &osd->link_xoff_rx, &nsd->link_xoff_rx); i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); for (i = 0; i < 8; i++) { + i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), + pf->stat_offsets_loaded, + &osd->priority_xoff_rx[i], + &nsd->priority_xoff_rx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_rx[i], @@ -1317,6 +1259,42 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr, } /** + * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS + * @vsi: the VSI to be searched + * @macaddr: the mac address to be removed + * @is_vf: true if it is a VF + * @is_netdev: true if it is a netdev + * + * Removes a given MAC address from a VSI, regardless of VLAN + * + * Returns 0 for success, or error + **/ +int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr, + bool is_vf, bool is_netdev) +{ + struct i40e_mac_filter *f = NULL; + int changed = 0; + + WARN(!spin_is_locked(&vsi->mac_filter_list_lock), + "Missing mac_filter_list_lock\n"); + list_for_each_entry(f, &vsi->mac_filter_list, list) { + if ((ether_addr_equal(macaddr, f->macaddr)) && + (is_vf == f->is_vf) && + (is_netdev == f->is_netdev)) { + f->counter--; + f->changed = true; + changed = 1; + } + } + if (changed) { + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + vsi->back->flags |= I40E_FLAG_FILTER_SYNC; + return 0; + } + return -ENOENT; +} + +/** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address @@ -1547,10 +1525,9 @@ static int i40e_set_mac(struct net_device *netdev, void *p) spin_unlock_bh(&vsi->mac_filter_list_lock); } - i40e_sync_vsi_filters(vsi, false); ether_addr_copy(netdev->dev_addr, addr->sa_data); - return 0; + return i40e_sync_vsi_filters(vsi); } /** @@ -1590,7 +1567,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT_ULL(i)) /* TC is enabled */ + if (enabled_tc & BIT(i)) /* TC is enabled */ numtc++; } if (!numtc) { @@ -1619,13 +1596,14 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, /* Setup queue offset/count for all TCs for given VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* See if the given TC is enabled for the given VSI */ - if (vsi->tc_config.enabled_tc & BIT_ULL(i)) { + if (vsi->tc_config.enabled_tc & BIT(i)) { /* TC is enabled */ int pow, num_qps; switch (vsi->type) { case I40E_VSI_MAIN: - qcount = min_t(int, pf->rss_size, num_tc_qps); + qcount = min_t(int, pf->alloc_rss_size, + num_tc_qps); break; #ifdef I40E_FCOE case I40E_VSI_FCOE: @@ -1851,13 +1829,12 @@ static void i40e_cleanup_add_list(struct list_head *add_list) /** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI - * @grab_rtnl: whether RTNL needs to be grabbed * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value **/ -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) +int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct list_head tmp_del_list, tmp_add_list; struct i40e_mac_filter *f, *ftmp, *fclone; @@ -1865,8 +1842,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool add_happened = false; int filter_list_len = 0; u32 changed_flags = 0; + i40e_status aq_ret = 0; bool err_cond = false; - i40e_status ret = 0; + int retval = 0; struct i40e_pf *pf; int num_add = 0; int num_del = 0; @@ -1929,17 +1907,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } spin_unlock_bh(&vsi->mac_filter_list_lock); - if (err_cond) + if (err_cond) { i40e_cleanup_add_list(&tmp_add_list); + retval = -ENOMEM; + goto out; + } } /* Now process 'del_list' outside the lock */ if (!list_empty(&tmp_del_list)) { + int del_list_size; + filter_list_len = pf->hw.aq.asq_buf_size / sizeof(struct i40e_aqc_remove_macvlan_element_data); - del_list = kcalloc(filter_list_len, - sizeof(struct i40e_aqc_remove_macvlan_element_data), - GFP_KERNEL); + del_list_size = filter_list_len * + sizeof(struct i40e_aqc_remove_macvlan_element_data); + del_list = kzalloc(del_list_size, GFP_KERNEL); if (!del_list) { i40e_cleanup_add_list(&tmp_add_list); @@ -1948,7 +1931,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) i40e_undo_del_filter_entries(vsi, &tmp_del_list); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) { @@ -1966,18 +1950,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_del == filter_list_len) { - ret = i40e_aq_remove_macvlan(&pf->hw, - vsi->seid, del_list, num_del, - NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, + vsi->seid, + del_list, + num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; - memset(del_list, 0, sizeof(*del_list)); + memset(del_list, 0, del_list_size); - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) { + retval = -EIO; dev_err(&pf->pdev->dev, "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + } } /* Release memory for MAC filter entries which were * synced up with HW. @@ -1987,15 +1975,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_del) { - ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, - del_list, num_del, NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, + del_list, num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) dev_info(&pf->pdev->dev, "ignoring delete macvlan error, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); } @@ -2004,13 +1993,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (!list_empty(&tmp_add_list)) { + int add_list_size; /* do all the adds now */ filter_list_len = pf->hw.aq.asq_buf_size / sizeof(struct i40e_aqc_add_macvlan_element_data), - add_list = kcalloc(filter_list_len, - sizeof(struct i40e_aqc_add_macvlan_element_data), - GFP_KERNEL); + add_list_size = filter_list_len * + sizeof(struct i40e_aqc_add_macvlan_element_data); + add_list = kzalloc(add_list_size, GFP_KERNEL); if (!add_list) { /* Purge element from temporary lists */ i40e_cleanup_add_list(&tmp_add_list); @@ -2019,7 +2009,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) spin_lock_bh(&vsi->mac_filter_list_lock); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) { @@ -2040,15 +2031,15 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_add == filter_list_len) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, - NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, + NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; - if (ret) + if (aq_ret) break; - memset(add_list, 0, sizeof(*add_list)); + memset(add_list, 0, add_list_size); } /* Entries from tmp_add_list were cloned from MAC * filter list, hence clean those cloned entries @@ -2058,18 +2049,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_add) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; } kfree(add_list); add_list = NULL; - if (add_happened && ret && aq_err != I40E_AQ_RC_EINVAL) { + if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) { + retval = i40e_aq_rc_to_posix(aq_ret, aq_err); dev_info(&pf->pdev->dev, "add filter failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) && !test_bit(__I40E_FILTER_OVERFLOW_PROMISC, @@ -2087,16 +2079,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool cur_multipromisc; cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI); - ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, - vsi->seid, - cur_multipromisc, - NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, + vsi->seid, + cur_multipromisc, + NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multi promisc failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } if ((changed_flags & IFF_PROMISC) || promisc_forced_on) { bool cur_promisc; @@ -2112,44 +2107,50 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) */ if (pf->cur_promisc != cur_promisc) { pf->cur_promisc = cur_promisc; - if (grab_rtnl) - i40e_do_reset_safe(pf, - BIT(__I40E_PF_RESET_REQUESTED)); - else - i40e_do_reset(pf, - BIT(__I40E_PF_RESET_REQUESTED)); + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); } } else { - ret = i40e_aq_set_vsi_unicast_promiscuous( + aq_ret = i40e_aq_set_vsi_unicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set unicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); - ret = i40e_aq_set_vsi_multicast_promiscuous( + aq_ret, pf->hw.aq.asq_last_status); + } + aq_ret = i40e_aq_set_vsi_multicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); + } } - ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, - vsi->seid, - cur_promisc, NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, + vsi->seid, + cur_promisc, NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set brdcast promisc failed, err %s, aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } - +out: clear_bit(__I40E_CONFIG_BUSY, &vsi->state); - return 0; + return retval; } /** @@ -2166,8 +2167,15 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) - i40e_sync_vsi_filters(pf->vsi[v], true); + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + int ret = i40e_sync_vsi_filters(pf->vsi[v]); + + if (ret) { + /* come back and try again later */ + pf->flags |= I40E_FLAG_FILTER_SYNC; + break; + } + } } } @@ -2377,16 +2385,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function will lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2459,16 +2464,13 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function with lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2711,6 +2713,11 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) netif_set_xps_queue(ring->netdev, mask, ring->queue_index); free_cpumask_var(mask); } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -4360,17 +4367,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + /* Bail out if interrupts are disabled because napi_poll + * execution in-progress or will get scheduled soon. + * napi_poll cleans TX and RX queues and updates 'next_to_clean'. + */ + if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) + return; + head = i40e_get_head(tx_ring); tx_pending = i40e_get_tx_pending(tx_ring); - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. + /* HW is done executing descriptors, updated HEAD write back, + * but SW hasn't processed those descriptors. If interrupt is + * not generated from this point ON, it could result into + * dev_watchdog detecting timeout on those netdev_queue, + * hence proactively trigger SW interrupt. */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); + if (tx_pending) { + /* NAPI Poll didn't run and clear since it was set */ + if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected)) { + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending, + tx_ring->next_to_clean, head, + tx_ring->next_to_use, + readl(tx_ring->tail)); + netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n", + vsi->seid, q_idx, val); + i40e_force_wb(vsi, tx_ring->q_vector); + } else { + /* First Chance - detected possible hung */ + set_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected); + } + } } /** @@ -4441,7 +4472,7 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf) if (app.selector == I40E_APP_SEL_TCPIP && app.protocolid == I40E_APP_PROTOID_ISCSI) { tc = dcbcfg->etscfg.prioritytable[app.priority]; - enabled_tc |= BIT_ULL(tc); + enabled_tc |= BIT(tc); break; } } @@ -4525,7 +4556,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) /* At least have TC0 */ enabled_tc = (enabled_tc ? enabled_tc : 0x1); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT_ULL(i)) + if (enabled_tc & BIT(i)) num_tc++; } return num_tc; @@ -4547,7 +4578,7 @@ static u8 i40e_pf_get_default_tc(struct i40e_pf *pf) /* Find the first enabled TC */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT_ULL(i)) + if (enabled_tc & BIT(i)) break; } @@ -4707,7 +4738,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) * will set the numtc for netdev as 2 that will be * referenced by the netdev layer as TC 0 and 1. */ - if (vsi->tc_config.enabled_tc & BIT_ULL(i)) + if (vsi->tc_config.enabled_tc & BIT(i)) netdev_set_tc_queue(netdev, vsi->tc_config.tc_info[i].netdev_tc, vsi->tc_config.tc_info[i].qcount, @@ -4769,7 +4800,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) /* Enable ETS TCs with equal BW Share for now across all VSIs */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT_ULL(i)) + if (enabled_tc & BIT(i)) bw_share[i] = 1; } @@ -4843,7 +4874,7 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc) /* Enable ETS TCs with equal BW Share for now */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT_ULL(i)) + if (enabled_tc & BIT(i)) bw_data.tc_bw_share_credits[i] = 1; } @@ -5240,7 +5271,7 @@ static int i40e_setup_tc(struct net_device *netdev, u8 tc) /* Generate TC map for number of tc requested */ for (i = 0; i < tc; i++) - enabled_tc |= BIT_ULL(i); + enabled_tc |= BIT(i); /* Requesting same TC configuration as already enabled */ if (enabled_tc == vsi->tc_config.enabled_tc) @@ -5738,7 +5769,7 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, **/ static void i40e_service_event_complete(struct i40e_pf *pf) { - BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); + WARN_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); /* flush memory to make sure state is correct before next watchog */ smp_mb__before_atomic(); @@ -6013,6 +6044,9 @@ static void i40e_link_event(struct i40e_pf *pf) i40e_status status; bool new_link, old_link; + /* save off old link status information */ + pf->hw.phy.link_info_old = pf->hw.phy.link_info; + /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; @@ -6101,23 +6135,23 @@ static void i40e_reset_subtask(struct i40e_pf *pf) rtnl_lock(); if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) { - reset_flags |= BIT_ULL(__I40E_REINIT_REQUESTED); + reset_flags |= BIT(__I40E_REINIT_REQUESTED); clear_bit(__I40E_REINIT_REQUESTED, &pf->state); } if (test_bit(__I40E_PF_RESET_REQUESTED, &pf->state)) { - reset_flags |= BIT_ULL(__I40E_PF_RESET_REQUESTED); + reset_flags |= BIT(__I40E_PF_RESET_REQUESTED); clear_bit(__I40E_PF_RESET_REQUESTED, &pf->state); } if (test_bit(__I40E_CORE_RESET_REQUESTED, &pf->state)) { - reset_flags |= BIT_ULL(__I40E_CORE_RESET_REQUESTED); + reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED); clear_bit(__I40E_CORE_RESET_REQUESTED, &pf->state); } if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state)) { - reset_flags |= BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED); + reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED); clear_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state); } if (test_bit(__I40E_DOWN_REQUESTED, &pf->state)) { - reset_flags |= BIT_ULL(__I40E_DOWN_REQUESTED); + reset_flags |= BIT(__I40E_DOWN_REQUESTED); clear_bit(__I40E_DOWN_REQUESTED, &pf->state); } @@ -6147,13 +6181,9 @@ unlock: static void i40e_handle_link_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_hw *hw = &pf->hw; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; - /* save off old link status information */ - hw->phy.link_info_old = hw->phy.link_info; - /* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct * This completely ignores any state information @@ -6192,15 +6222,18 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) val = rd32(&pf->hw, pf->hw.aq.arq.len); oldval = val; if (val & I40E_PF_ARQLEN_ARQVFE_MASK) { - dev_info(&pf->pdev->dev, "ARQ VF Error detected\n"); + if (hw->debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ARQ VF Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQVFE_MASK; } if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) { - dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); + if (hw->debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK; } if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) { - dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n"); + if (hw->debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK; } if (oldval != val) @@ -6209,15 +6242,18 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) val = rd32(&pf->hw, pf->hw.aq.asq.len); oldval = val; if (val & I40E_PF_ATQLEN_ATQVFE_MASK) { - dev_info(&pf->pdev->dev, "ASQ VF Error detected\n"); + if (pf->hw.debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ASQ VF Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQVFE_MASK; } if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) { - dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n"); + if (pf->hw.debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK; } if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) { - dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n"); + if (pf->hw.debug_mask & I40E_DEBUG_AQ) + dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK; } if (oldval != val) @@ -6268,6 +6304,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) break; case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: + case i40e_aqc_opc_oem_post_update: i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n"); break; default: @@ -6685,6 +6722,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) struct i40e_hw *hw = &pf->hw; u8 set_fc_aq_fail = 0; i40e_status ret; + u32 val; u32 v; /* Now we wait for GRST to settle out. @@ -6823,6 +6861,20 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) } } + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ +#define I40E_REG_MSS 0x000E64DC +#define I40E_REG_MSS_MIN_MASK 0x3FF0000 +#define I40E_64BYTE_MSS 0x400000 + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); @@ -7282,6 +7334,23 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) } /** + * i40e_clear_rss_config_user - clear the user configured RSS hash keys + * and lookup table + * @vsi: Pointer to VSI structure + */ +static void i40e_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured **/ @@ -7318,6 +7387,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); i40e_vsi_free_arrays(vsi, true); + i40e_clear_rss_config_user(vsi); pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) @@ -7780,7 +7850,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) * @vsi: vsi structure * @seed: RSS hash seed **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { struct i40e_aqc_get_set_rss_key_data rss_key; struct i40e_pf *pf = vsi->back; @@ -7833,43 +7904,57 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) { u8 seed[I40E_HKEY_ARRAY_SIZE]; struct i40e_pf *pf = vsi->back; + u8 *lut; + int ret; - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); + if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) + return 0; - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(vsi, seed); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; - return 0; + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** - * i40e_config_rss_reg - Prepare for RSS if used - * @pf: board private structure + * i40e_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure **/ -static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) +static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - u32 *seed_dw = (u32 *)seed; - u32 current_queue = 0; - u32 lut = 0; - int i, j; + u8 i; /* Fill out hash function seed */ - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + if (seed) { + u32 *seed_dw = (u32 *)seed; - for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (current_queue == vsi->rss_size) - current_queue = 0; - lut |= ((current_queue) << (8 * j)); - current_queue++; - } - wr32(&pf->hw, I40E_PFQF_HLUT(i), lut); + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); } i40e_flush(hw); @@ -7877,18 +7962,101 @@ static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) } /** - * i40e_config_rss - Prepare for RSS if used + * i40e_get_rss_reg - Get the RSS keys and lut by reading registers + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + } + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40e_config_rss - Configure RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + */ +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_get_rss - Get RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + return i40e_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_fill_rss_lut - Fill the RSS lookup table with default values + * @pf: Pointer to board private structure + * @lut: Lookup table + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + */ +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * i40e_pf_config_rss - Prepare for RSS if used * @pf: board private structure **/ -static int i40e_config_rss(struct i40e_pf *pf) +static int i40e_pf_config_rss(struct i40e_pf *pf) { struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; struct i40e_hw *hw = &pf->hw; u32 reg_val; u64 hena; - - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | @@ -7898,8 +8066,6 @@ static int i40e_config_rss(struct i40e_pf *pf) wr32(hw, I40E_PFQF_HENA(0), (u32)hena); wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); - /* Determine the RSS table size based on the hardware capabilities */ reg_val = rd32(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? @@ -7907,10 +8073,32 @@ static int i40e_config_rss(struct i40e_pf *pf) (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); wr32(hw, I40E_PFQF_CTL_0, reg_val); - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(pf->vsi[pf->lan_vsi], seed); + /* Determine the RSS size of the VSI */ + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else - return i40e_config_rss_reg(pf, seed); + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** @@ -7935,13 +8123,28 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); - pf->rss_size = new_rss_size; + pf->alloc_rss_size = new_rss_size; i40e_reset_and_rebuild(pf, true); - i40e_config_rss(pf); + + /* Discard the user configured hash keys and lut, if less + * queues are enabled. + */ + if (queue_count < vsi->rss_size) { + i40e_clear_rss_config_user(vsi); + dev_dbg(&pf->pdev->dev, + "discard user configured hash keys and lut\n"); + } + + /* Reset vsi->rss_size, as number of enabled queues changed */ + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + i40e_pf_config_rss(pf); } - dev_info(&pf->pdev->dev, "RSS count: %d\n", pf->rss_size); - return pf->rss_size; + dev_info(&pf->pdev->dev, "RSS count/HW max RSS count: %d/%d\n", + pf->alloc_rss_size, pf->rss_size_max); + return pf->alloc_rss_size; } /** @@ -8112,13 +8315,14 @@ static int i40e_sw_init(struct i40e_pf *pf) * maximum might end up larger than the available queues */ pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width); - pf->rss_size = 1; + pf->alloc_rss_size = 1; pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; - pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); + pf->alloc_rss_size = min_t(int, pf->rss_size_max, + num_online_cpus()); } /* MFP mode enabled */ @@ -9051,7 +9255,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) f->is_vf, f->is_netdev); spin_unlock_bh(&vsi->mac_filter_list_lock); - i40e_sync_vsi_filters(vsi, false); + i40e_sync_vsi_filters(vsi); i40e_vsi_delete(vsi); i40e_vsi_free_q_vectors(vsi); @@ -9947,7 +10151,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) * the hash */ if ((pf->flags & I40E_FLAG_RSS_ENABLED)) - i40e_config_rss(pf); + i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ i40e_update_link_info(&pf->hw); @@ -9985,7 +10189,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) !(pf->flags & I40E_FLAG_MSIX_ENABLED)) { /* one qp for PF, no queues for anything else */ queues_left = 0; - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10002,7 +10206,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_CAPABLE))) { /* one qp for PF */ - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10072,8 +10276,9 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, !!(pf->flags & I40E_FLAG_FD_SB_ENABLED), - pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps, - pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); + pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, + pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, + queues_left); #ifdef I40E_FCOE dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps); #endif @@ -10111,55 +10316,53 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) } #define INFO_STRING_LEN 255 +#define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - char *buf, *string; + char *buf; + int i; - string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); - if (!string) { - dev_err(&pf->pdev->dev, "Features string allocation failed\n"); + buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL); + if (!buf) return; - } - - buf = string; - buf += sprintf(string, "Features: PF-id[%d] ", hw->pf_id); + i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - buf += sprintf(buf, "VFs: %d ", pf->num_req_vfs); + i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - buf += sprintf(buf, "VSIs: %d QP: %d RX: %s ", - pf->hw.func_caps.num_vsis, - pf->vsi[pf->lan_vsi]->num_queue_pairs, - pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); + i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s", + pf->hw.func_caps.num_vsis, + pf->vsi[pf->lan_vsi]->num_queue_pairs, + pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); if (pf->flags & I40E_FLAG_RSS_ENABLED) - buf += sprintf(buf, "RSS "); + i += snprintf(&buf[i], REMAIN(i), " RSS"); if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - buf += sprintf(buf, "FD_ATR "); + i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - buf += sprintf(buf, "FD_SB "); - buf += sprintf(buf, "NTUPLE "); + i += snprintf(&buf[i], REMAIN(i), " FD_SB"); + i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (pf->flags & I40E_FLAG_DCB_CAPABLE) - buf += sprintf(buf, "DCB "); + i += snprintf(&buf[i], REMAIN(i), " DCB"); #if IS_ENABLED(CONFIG_VXLAN) - buf += sprintf(buf, "VxLAN "); + i += snprintf(&buf[i], REMAIN(i), " VxLAN"); #endif if (pf->flags & I40E_FLAG_PTP) - buf += sprintf(buf, "PTP "); + i += snprintf(&buf[i], REMAIN(i), " PTP"); #ifdef I40E_FCOE if (pf->flags & I40E_FLAG_FCOE_ENABLED) - buf += sprintf(buf, "FCOE "); + i += snprintf(&buf[i], REMAIN(i), " FCOE"); #endif if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - buf += sprintf(buf, "VEB "); + i += snprintf(&buf[i], REMAIN(i), " VEB"); else - buf += sprintf(buf, "VEPA "); + i += snprintf(&buf[i], REMAIN(i), " VEPA"); - BUG_ON(buf > (string + INFO_STRING_LEN)); - dev_info(&pf->pdev->dev, "%s\n", string); - kfree(string); + dev_info(&pf->pdev->dev, "%s\n", buf); + kfree(buf); + WARN_ON(i > INFO_STRING_LEN); } /** @@ -10183,6 +10386,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 link_status; int err; u32 len; + u32 val; u32 i; u8 set_fc_aq_fail; @@ -10296,6 +10500,16 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->hw.fc.requested_mode = I40E_FC_NONE; err = i40e_init_adminq(hw); + if (err) { + if (err == I40E_ERR_FIRMWARE_API_VERSION) + dev_info(&pdev->dev, + "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + else + dev_info(&pdev->dev, + "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); + + goto err_pf_reset; + } /* provide nvm, fw, api versions */ dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", @@ -10303,12 +10517,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.api_maj_ver, hw->aq.api_min_ver, i40e_nvm_version_str(hw)); - if (err) { - dev_info(&pdev->dev, - "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); - goto err_pf_reset; - } - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) dev_info(&pdev->dev, @@ -10405,7 +10613,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* NVM bit on means WoL disabled for the port */ i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits); - if ((1 << hw->port) & wol_nvm_bits || hw->partition_id != 1) + if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1) pf->wol_en = false; else pf->wol_en = true; @@ -10487,6 +10695,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_stat_str(&pf->hw, err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6649ce4ba2de..b0ae3e695783 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -235,6 +235,9 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, "Filter deleted for PCTYPE %d loc = %d\n", fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -312,6 +315,9 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -322,7 +328,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * - * Always returns -EOPNOTSUPP + * Returns 0 if the filters were successfully added or removed **/ static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, @@ -387,6 +393,9 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, } } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -506,9 +515,6 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, pf->auto_disable_flags |= I40E_FLAG_FD_SB_ENABLED; } - } else { - dev_info(&pdev->dev, - "FD filter programming failed due to incorrect filter parameters\n"); } } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { if (I40E_DEBUG_FD & pf->hw.debug_mask) @@ -526,11 +532,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -542,6 +544,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -1863,7 +1869,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1891,12 +1896,14 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return 0; } + /* Clear hung_detected bit */ + clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1925,8 +1932,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40e_force_wb(vsi, q_vector); + } return budget; } @@ -2186,14 +2195,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_type_cmd_tso_mss: ptr to u64 object - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -2246,7 +2253,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @tx_flags: the collected send information - * @cd_type_cmd_tso_mss: ptr to u64 object + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen **/ @@ -2806,6 +2813,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, int tsyn; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -2825,8 +2835,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 6779fb771d6a..dccc1eb576f2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -202,6 +202,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index ae879826084b..3226946bf3d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -153,6 +153,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 44462b40f2d7..aa58a498c239 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -290,8 +290,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, next_q = find_first_bit(&linklistmap, (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)); - vsi_queue_id = next_q/I40E_VIRTCHNL_SUPPORTED_QTYPES; - qtype = next_q%I40E_VIRTCHNL_SUPPORTED_QTYPES; + vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; + qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); @@ -549,12 +549,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) i40e_vsi_add_pvid(vsi, vf->port_vlan_id); spin_lock_bh(&vsi->mac_filter_list_lock); - f = i40e_add_filter(vsi, vf->default_lan_addr.addr, - vf->port_vlan_id ? vf->port_vlan_id : -1, - true, false); - if (!f) - dev_info(&pf->pdev->dev, - "Could not allocate VF MAC addr\n"); + if (is_valid_ether_addr(vf->default_lan_addr.addr)) { + f = i40e_add_filter(vsi, vf->default_lan_addr.addr, + vf->port_vlan_id ? vf->port_vlan_id : -1, + true, false); + if (!f) + dev_info(&pf->pdev->dev, + "Could not add MAC filter %pM for VF %d\n", + vf->default_lan_addr.addr, vf->vf_id); + } f = i40e_add_filter(vsi, brdcast, vf->port_vlan_id ? vf->port_vlan_id : -1, true, false); @@ -565,7 +568,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) } /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi, false); + ret = i40e_sync_vsi_filters(vsi); if (ret) dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); @@ -1094,8 +1097,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_invalid_msgs++; - dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n", - v_opcode, v_retval); + dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n", + vf->vf_id, v_opcode, v_retval); if (vf->num_invalid_msgs > I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) { dev_err(&pf->pdev->dev, @@ -1623,7 +1626,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (!f) { dev_err(&pf->pdev->dev, - "Unable to add VF MAC filter\n"); + "Unable to add MAC filter %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_PARAM; spin_unlock_bh(&vsi->mac_filter_list_lock); goto error_param; @@ -1632,8 +1636,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1669,8 +1675,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < al->num_elements; i++) { if (is_broadcast_ether_addr(al->list[i].addr) || is_zero_ether_addr(al->list[i].addr)) { - dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", - al->list[i].addr); + dev_err(&pf->pdev->dev, "Invalid MAC addr %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } @@ -1680,13 +1686,19 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_lock_bh(&vsi->mac_filter_list_lock); /* delete addresses from the list */ for (i = 0; i < al->num_elements; i++) - i40e_del_filter(vsi, al->list[i].addr, - I40E_VLAN_ANY, true, false); + if (i40e_del_mac_all_vlan(vsi, al->list[i].addr, true, false)) { + ret = I40E_ERR_INVALID_MAC_ADDR; + spin_unlock_bh(&vsi->mac_filter_list_lock); + goto error_param; + } + spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1740,8 +1752,8 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to add VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to add VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -1792,8 +1804,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to delete VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to delete VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -2099,7 +2111,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ - if (i40e_sync_vsi_filters(vsi, false)) { + if (i40e_sync_vsi_filters(vsi)) { dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); ret = -EIO; goto error_param; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index fcb9ef34cc7a..f5b2b369dc7c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -227,6 +227,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_update = 0x0703, i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, + i40e_aqc_opc_oem_post_update = 0x0720, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -1888,6 +1889,26 @@ struct i40e_aqc_nvm_config_data_immediate_field { I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field); +/* OEM Post Update (indirect 0x0720) + * no command data struct used + */ + struct i40e_aqc_nvm_oem_post_update { +#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA 0x01 + u8 sel_data; + u8 reserved[7]; +}; + +I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update); + +struct i40e_aqc_nvm_oem_post_update_buffer { + u8 str_len; + u8 dev_addr; + __le16 eeprom_addr; + u8 data[36]; +}; + +I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2311,4 +2332,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 77968b184b1f..4ca40651a228 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,11 +51,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -67,6 +63,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -127,17 +127,24 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) } /** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of + * i40evf_get_tx_pending - how many Tx descriptors not processed + * @tx_ring: the ring of descriptors * - * Returns value of Tx ring head based on value stored - * in head write-back location + * Since there is no access to the ring head register + * in XL710, we need to use our local copies **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring) { - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + u32 head, tail; - return le32_to_cpu(*(volatile __le32 *)head); + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } #define WB_STRIDE 0x3 @@ -245,16 +252,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -414,7 +411,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) return false; } -/* +/** * i40evf_setup_tx_descriptors - Allocate the Tx descriptors * @tx_ring: the tx ring to set up * @@ -1262,10 +1259,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } + if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } + if (rx || tx) { /* get the higher of the two ITR adjustments and * use the same value for both ITR registers @@ -1301,7 +1300,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1334,7 +1332,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1363,8 +1361,10 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40evf_force_wb(vsi, q_vector); + } return budget; } @@ -1436,13 +1436,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -1554,7 +1553,6 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, *tx_flags |= I40E_TX_FLAGS_IPV6; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { @@ -1653,7 +1651,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); } - /** +/** * i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @tx_flags: collected send information @@ -1769,6 +1767,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -1809,6 +1810,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1828,6 +1831,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1842,35 +1847,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ -#define WB_STRIDE 0x3 - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -1880,15 +1856,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: @@ -1960,6 +1993,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, u8 hdr_len = 0; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -1979,8 +2015,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2028,7 +2063,7 @@ out_drop: netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; + struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping]; /* hardware can't handle really short frames, hardware padding works * beyond this point diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index ebc1bf77f036..e29bb3e86cfd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -201,6 +201,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { @@ -267,6 +268,8 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ + u8 packet_stride; +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) @@ -321,4 +324,19 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); +u32 i40evf_get_tx_pending(struct i40e_ring *ring); + +/** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: Tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index 9f7b279b9d9c..3b9d2037456c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -153,6 +153,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 22fc3d49c4b9..be1b72b93888 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -67,6 +67,8 @@ struct i40e_vsi { u16 rx_itr_setting; u16 tx_itr_setting; u16 qs_handle; + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ }; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -95,10 +97,10 @@ struct i40e_vsi { #define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i])) #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define MAX_RX_QUEUES 8 -#define MAX_TX_QUEUES MAX_RX_QUEUES +#define MAX_QUEUES 16 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -142,9 +144,6 @@ struct i40e_q_vector { #define OTHER_VECTOR 1 #define NONQ_VECS (OTHER_VECTOR) -#define MAX_MSIX_Q_VECTORS 4 -#define MAX_MSIX_COUNT 5 - #define MIN_MSIX_Q_VECTORS 1 #define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS) @@ -190,19 +189,19 @@ struct i40evf_adapter { struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work init_task; - struct i40e_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; + struct i40e_q_vector *q_vectors; struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; /* TX */ - struct i40e_ring *tx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *tx_rings; u32 tx_timeout_count; struct list_head mac_filter_list; u32 tx_desc_count; /* RX */ - struct i40e_ring *rx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *rx_rings; u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; @@ -313,4 +312,8 @@ void i40evf_request_reset(struct i40evf_adapter *adapter); void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, enum i40e_virtchnl_ops v_opcode, i40e_status v_retval, u8 *msg, u16 msglen); +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 4790437a50ac..a4c9feb589e7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -121,12 +121,12 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev, data[i] = *(u64 *)p; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->tx_rings[j]->stats.packets; - data[i++] = adapter->tx_rings[j]->stats.bytes; + data[i++] = adapter->tx_rings[j].stats.packets; + data[i++] = adapter->tx_rings[j].stats.bytes; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->rx_rings[j]->stats.packets; - data[i++] = adapter->rx_rings[j]->stats.bytes; + data[i++] = adapter->rx_rings[j].stats.packets; + data[i++] = adapter->rx_rings[j].stats.bytes; } } @@ -351,7 +351,7 @@ static int i40evf_set_coalesce(struct net_device *netdev, vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = adapter->q_vector[i]; + q_vector = &adapter->q_vectors[i]; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); @@ -634,25 +634,34 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL, *lut; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; if (!indir) return 0; - if (indir) { - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = rd32(hw, I40E_VFQF_HLUT(i)); - indir[j++] = hlut_val & 0xff; - indir[j++] = (hlut_val >> 8) & 0xff; - indir[j++] = (hlut_val >> 16) & 0xff; - indir[j++] = (hlut_val >> 24) & 0xff; - } - } - return 0; + seed = key; + + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + if (ret) + goto out; + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)lut[i]; + +out: + kfree(lut); + + return ret; } /** @@ -668,9 +677,9 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL; + u16 i; /* We do not allow change in unsupported parameters */ if (key || @@ -679,15 +688,29 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = indir[j++]; - hlut_val |= indir[j++] << 8; - hlut_val |= indir[j++] << 16; - hlut_val |= indir[j++] << 24; - wr32(hw, I40E_VFQF_HLUT(i), hlut_val); + if (key) { + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; + } + memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; + } + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; } - return 0; + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40evf_config_rss(vsi, seed, vsi->rss_lut_user, + I40EVF_HLUT_ARRAY_SIZE); } static const struct ethtool_ops i40evf_ethtool_ops = { diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index d962164dfb0f..455394cf7f80 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -34,7 +34,15 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710/X710 Virtual Function Network Driver"; -#define DRV_VERSION "1.3.33" +#define DRV_KERN "-k" + +#define DRV_VERSION_MAJOR 1 +#define DRV_VERSION_MINOR 4 +#define DRV_VERSION_BUILD 4 +#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ + __stringify(DRV_VERSION_MINOR) "." \ + __stringify(DRV_VERSION_BUILD) \ + DRV_KERN const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = "Copyright (c) 2013 - 2015 Intel Corporation."; @@ -259,7 +267,7 @@ static void i40evf_fire_sw_int(struct i40evf_adapter *adapter, u32 mask) { struct i40e_hw *hw = &adapter->hw; int i; - uint32_t dyn_ctl; + u32 dyn_ctl; if (mask & 1) { dyn_ctl = rd32(hw, I40E_VFINT_DYN_CTL01); @@ -307,10 +315,9 @@ static irqreturn_t i40evf_msix_aq(int irq, void *data) struct i40e_hw *hw = &adapter->hw; u32 val; - /* handle non-queue interrupts */ - rd32(hw, I40E_VFINT_ICR01); - rd32(hw, I40E_VFINT_ICR0_ENA1); - + /* handle non-queue interrupts, these reads clear the registers */ + val = rd32(hw, I40E_VFINT_ICR01); + val = rd32(hw, I40E_VFINT_ICR0_ENA1); val = rd32(hw, I40E_VFINT_DYN_CTL01) | I40E_VFINT_DYN_CTL01_CLEARPBA_MASK; @@ -348,8 +355,8 @@ static irqreturn_t i40evf_msix_clean_rings(int irq, void *data) static void i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *rx_ring = adapter->rx_rings[r_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -369,8 +376,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) static void i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *tx_ring = adapter->tx_rings[t_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -465,7 +472,7 @@ static void i40evf_netpoll(struct net_device *netdev) return; for (i = 0; i < q_vectors; i++) - i40evf_msix_clean_rings(0, adapter->q_vector[i]); + i40evf_msix_clean_rings(0, &adapter->q_vectors[i]); } #endif @@ -487,7 +494,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (vector = 0; vector < q_vectors; vector++) { - struct i40e_q_vector *q_vector = adapter->q_vector[vector]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -532,7 +539,7 @@ free_queue_irqs: adapter->msix_entries[vector + NONQ_VECS].vector, NULL); free_irq(adapter->msix_entries[vector + NONQ_VECS].vector, - adapter->q_vector[vector]); + &adapter->q_vectors[vector]); } return err; } @@ -582,7 +589,7 @@ static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter) irq_set_affinity_hint(adapter->msix_entries[i+1].vector, NULL); free_irq(adapter->msix_entries[i+1].vector, - adapter->q_vector[i]); + &adapter->q_vectors[i]); } } @@ -611,7 +618,7 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - adapter->tx_rings[i]->tail = hw->hw_addr + I40E_QTX_TAIL1(i); + adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i); } /** @@ -656,8 +663,8 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) } for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->tail = hw->hw_addr + I40E_QRX_TAIL1(i); - adapter->rx_rings[i]->rx_buf_len = rx_buf_len; + adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); + adapter->rx_rings[i].rx_buf_len = rx_buf_len; } } @@ -954,7 +961,7 @@ static void i40evf_napi_enable_all(struct i40evf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { struct napi_struct *napi; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi = &q_vector->napi; napi_enable(napi); } @@ -971,7 +978,7 @@ static void i40evf_napi_disable_all(struct i40evf_adapter *adapter) int q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi_disable(&q_vector->napi); } } @@ -992,7 +999,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES; for (i = 0; i < adapter->num_active_queues; i++) { - struct i40e_ring *ring = adapter->rx_rings[i]; + struct i40e_ring *ring = &adapter->rx_rings[i]; i40evf_alloc_rx_buffers_1buf(ring, ring->count); ring->next_to_use = ring->count - 1; @@ -1112,16 +1119,10 @@ i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors) **/ static void i40evf_free_queues(struct i40evf_adapter *adapter) { - int i; - if (!adapter->vsi_res) return; - for (i = 0; i < adapter->num_active_queues; i++) { - if (adapter->tx_rings[i]) - kfree_rcu(adapter->tx_rings[i], rcu); - adapter->tx_rings[i] = NULL; - adapter->rx_rings[i] = NULL; - } + kfree(adapter->tx_rings); + kfree(adapter->rx_rings); } /** @@ -1136,13 +1137,20 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i; + adapter->tx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->tx_rings) + goto err_out; + adapter->rx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->rx_rings) + goto err_out; + for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *tx_ring; struct i40e_ring *rx_ring; - tx_ring = kzalloc(sizeof(*tx_ring) * 2, GFP_KERNEL); - if (!tx_ring) - goto err_out; + tx_ring = &adapter->tx_rings[i]; tx_ring->queue_index = i; tx_ring->netdev = adapter->netdev; @@ -1150,14 +1158,12 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->count = adapter->tx_desc_count; if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; - adapter->tx_rings[i] = tx_ring; - rx_ring = &tx_ring[1]; + rx_ring = &adapter->rx_rings[i]; rx_ring->queue_index = i; rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - adapter->rx_rings[i] = rx_ring; } return 0; @@ -1207,115 +1213,273 @@ static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter) err = i40evf_acquire_msix_vectors(adapter, v_budget); out: - adapter->netdev->real_num_tx_queues = pairs; + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); return err; } /** - * i40e_configure_rss_aq - Prepare for RSS using AQ commands + * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { - struct i40e_aqc_get_set_rss_key_data rss_key; struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - int ret = 0, i; - u8 *rss_lut; + int ret = 0; if (!vsi->id) - return; + return -EINVAL; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ dev_err(&adapter->pdev->dev, "Cannot confiure RSS, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } - memset(&rss_key, 0, sizeof(rss_key)); - memcpy(&rss_key, seed, sizeof(rss_key)); + if (seed) { + struct i40e_aqc_get_set_rss_key_data *rss_key = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40evf_aq_set_rss_key(hw, vsi->id, rss_key); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - rss_lut = kzalloc(((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4), GFP_KERNEL); - if (!rss_lut) - return; + if (lut) { + ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= (I40E_VFQF_HLUT_MAX_INDEX * 4); i++) - rss_lut[i] = i % adapter->num_active_queues; + return ret; +} - ret = i40evf_aq_set_rss_key(hw, vsi->id, &rss_key); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return; +/** + * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); } - ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, rss_lut, - (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4); - if (ret) - dev_err(&adapter->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HLUT(i), lut_dw[i]); + } + i40e_flush(hw); + + return 0; } /** - * i40e_configure_rss_reg - Prepare for RSS if used - * @adapter: board private structure - * @seed: RSS hash seed + * * i40evf_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_reg(struct i40evf_adapter *adapter, - const u8 *seed) +static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { + struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - u32 *seed_dw = (u32 *)seed; - u32 cqueue = 0; - u32 lut = 0; - int i, j; + int ret = 0; - /* Fill out hash function seed */ - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); - - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (cqueue == adapter->num_active_queues) - cqueue = 0; - lut |= ((cqueue) << (8 * j)); - cqueue++; + if (seed) { + ret = i40evf_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; } - wr32(hw, I40E_VFQF_HLUT(i), lut); } - i40e_flush(hw); + + if (lut) { + ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + + return ret; } /** - * i40evf_configure_rss - Prepare for RSS + * * i40evf_get_rss_reg - Get RSS keys and lut by reading registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_get_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_VFQF_HKEY(i)); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_VFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40evf_config_rss - Configure RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_get_rss - Get RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_fill_rss_lut - Fill the lut with default values + * @lut: Lookup table to be filled with + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + **/ +static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * i40evf_init_rss - Prepare for RSS * @adapter: board private structure + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss(struct i40evf_adapter *adapter) +static int i40evf_init_rss(struct i40evf_adapter *adapter) { + struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; u8 seed[I40EVF_HKEY_ARRAY_SIZE]; u64 hena; - - netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + u8 *lut; + int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); - if (RSS_AQ(adapter)) - i40evf_configure_rss_aq(&adapter->vsi, seed); + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, I40EVF_HLUT_ARRAY_SIZE); + else + i40evf_fill_rss_lut(lut, I40EVF_HLUT_ARRAY_SIZE, + adapter->num_active_queues); + + /* Use user configured hash key if there is one, otherwise + * user default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40EVF_HKEY_ARRAY_SIZE); else - i40evf_configure_rss_reg(adapter, seed); + netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + ret = i40evf_config_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + kfree(lut); + + return ret; } /** @@ -1327,21 +1491,22 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) **/ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) { - int q_idx, num_q_vectors; + int q_idx = 0, num_q_vectors; struct i40e_q_vector *q_vector; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector), + GFP_KERNEL); + if (!adapter->q_vectors) + goto err_out; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - goto err_out; + q_vector = &adapter->q_vectors[q_idx]; q_vector->adapter = adapter; q_vector->vsi = &adapter->vsi; q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, i40evf_napi_poll, NAPI_POLL_WEIGHT); - adapter->q_vector[q_idx] = q_vector; } return 0; @@ -1349,11 +1514,10 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) err_out: while (q_idx) { q_idx--; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; } + kfree(adapter->q_vectors); return -ENOMEM; } @@ -1374,13 +1538,11 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter) napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct i40e_q_vector *q_vector = adapter->q_vector[q_idx]; - - adapter->q_vector[q_idx] = NULL; + struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx]; if (q_idx < napi_vectors) netif_napi_del(&q_vector->napi); - kfree(q_vector); } + kfree(adapter->q_vectors); } /** @@ -1439,6 +1601,22 @@ err_set_interrupt: } /** + * i40evf_clear_rss_config_user - Clear user configurations of RSS + * @vsi: Pointer to VSI structure + **/ +static void i40evf_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long **/ @@ -1565,7 +1743,7 @@ static void i40evf_watchdog_task(struct work_struct *work) * PF, so we don't have to set current_op as we will * not get a response through the ARQ. */ - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS; goto watchdog_done; } @@ -1864,9 +2042,12 @@ void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter) { int i; + if (!adapter->tx_rings) + return; + for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->tx_rings[i]->desc) - i40evf_free_tx_resources(adapter->tx_rings[i]); + if (adapter->tx_rings[i].desc) + i40evf_free_tx_resources(&adapter->tx_rings[i]); } /** @@ -1884,8 +2065,8 @@ static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->tx_rings[i]->count = adapter->tx_desc_count; - err = i40evf_setup_tx_descriptors(adapter->tx_rings[i]); + adapter->tx_rings[i].count = adapter->tx_desc_count; + err = i40evf_setup_tx_descriptors(&adapter->tx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1911,8 +2092,8 @@ static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->count = adapter->rx_desc_count; - err = i40evf_setup_rx_descriptors(adapter->rx_rings[i]); + adapter->rx_rings[i].count = adapter->rx_desc_count; + err = i40evf_setup_rx_descriptors(&adapter->rx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1932,9 +2113,12 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) { int i; + if (!adapter->rx_rings) + return; + for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->rx_rings[i]->desc) - i40evf_free_rx_resources(adapter->rx_rings[i]); + if (adapter->rx_rings[i].desc) + i40evf_free_rx_resources(&adapter->rx_rings[i]); } /** @@ -2263,6 +2447,14 @@ static void i40evf_init_task(struct work_struct *work) if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { err = i40evf_send_vf_config_msg(adapter); goto err; + } else if (err == I40E_ERR_PARAM) { + /* We only get ERR_PARAM if the device is in a very bad + * state or if we've been disabled for previous bad + * behavior. Either way, we're done now. + */ + i40evf_shutdown_adminq(hw); + dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); + return; } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", @@ -2313,7 +2505,7 @@ static void i40evf_init_task(struct work_struct *work) I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; if (!RSS_AQ(adapter)) - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2334,7 +2526,6 @@ static void i40evf_init_task(struct work_struct *work) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - dev_info(&pdev->dev, "%s\n", i40evf_driver_string); adapter->state = __I40EVF_DOWN; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_misc_irq_enable(adapter); @@ -2343,7 +2534,7 @@ static void i40evf_init_task(struct work_struct *work) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); } else { - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); } return; restart: @@ -2438,8 +2629,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), - MAX_TX_QUEUES); + netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), MAX_QUEUES); if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; @@ -2626,6 +2816,9 @@ static void i40evf_remove(struct pci_dev *pdev) flush_scheduled_work(); + /* Clear user configurations for RSS */ + i40evf_clear_rss_config_user(&adapter->vsi); + if (hw->aq.asq.count) i40evf_shutdown_adminq(hw); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 32e620e1eb5c..c1c526283757 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -157,7 +157,9 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ | I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG | I40E_VIRTCHNL_VF_OFFLOAD_VLAN | - I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; + adapter->current_op = I40E_VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; if (PF_IS_V11(adapter)) @@ -242,7 +244,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES; len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) + (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs); - vqci = kzalloc(len, GFP_ATOMIC); + vqci = kzalloc(len, GFP_KERNEL); if (!vqci) return; @@ -255,19 +257,19 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) for (i = 0; i < pairs; i++) { vqpi->txq.vsi_id = vqci->vsi_id; vqpi->txq.queue_id = i; - vqpi->txq.ring_len = adapter->tx_rings[i]->count; - vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma; + vqpi->txq.ring_len = adapter->tx_rings[i].count; + vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; vqpi->txq.headwb_enabled = 1; vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr + (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc)); vqpi->rxq.vsi_id = vqci->vsi_id; vqpi->rxq.queue_id = i; - vqpi->rxq.ring_len = adapter->rx_rings[i]->count; - vqpi->rxq.dma_ring_addr = adapter->rx_rings[i]->dma; + vqpi->rxq.ring_len = adapter->rx_rings[i].count; + vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; - vqpi->rxq.databuffer_size = adapter->rx_rings[i]->rx_buf_len; + vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; vqpi++; } @@ -353,14 +355,14 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) len = sizeof(struct i40e_virtchnl_irq_map_info) + (adapter->num_msix_vectors * sizeof(struct i40e_virtchnl_vector_map)); - vimi = kzalloc(len, GFP_ATOMIC); + vimi = kzalloc(len, GFP_KERNEL); if (!vimi) return; vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vector[v_idx]; + q_vector = adapter->q_vectors + v_idx; vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; @@ -391,6 +393,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; int len, i = 0, count = 0; struct i40evf_mac_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -415,10 +418,12 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -431,7 +436,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -450,6 +456,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; struct i40evf_mac_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -474,9 +481,11 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -490,7 +499,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -509,6 +519,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; int len, i = 0, count = 0; struct i40evf_vlan_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -534,9 +545,11 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -549,7 +562,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -567,6 +581,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; struct i40evf_vlan_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -592,9 +607,11 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -608,7 +625,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -724,9 +742,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, return; } if (v_retval) { - dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", - v_retval, i40evf_stat_str(&adapter->hw, v_retval), - v_opcode); + switch (v_opcode) { + case I40E_VIRTCHNL_OP_ADD_VLAN: + dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_VLAN: + dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + default: + dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", + v_retval, + i40evf_stat_str(&adapter->hw, v_retval), + v_opcode); + } } switch (v_opcode) { case I40E_VIRTCHNL_OP_GET_STATS: { diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 7a73510e547c..362911d024b5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -272,6 +272,11 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) if (ret_val) goto out; } + if (phy->id == M88E1543_E_PHY_ID) { + ret_val = igb_initialize_M88E1543_phy(hw); + if (ret_val) + goto out; + } break; case IGP03E1000_E_PHY_ID: phy->type = e1000_phy_igp_3; @@ -294,6 +299,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) case I210_I_PHY_ID: phy->type = e1000_phy_i210; phy->ops.check_polarity = igb_check_polarity_m88; + phy->ops.get_cfg_done = igb_get_cfg_done_i210; phy->ops.get_phy_info = igb_get_phy_info_m88; phy->ops.get_cable_length = igb_get_cable_length_m88_gen2; phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580; @@ -925,6 +931,8 @@ static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) if (phy->id == M88E1512_E_PHY_ID) ret_val = igb_initialize_M88E1512_phy(hw); + if (phy->id == M88E1543_E_PHY_ID) + ret_val = igb_initialize_M88E1543_phy(hw); out: return ret_val; } diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index b1915043bc0c..a61ee9462dd4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -990,6 +990,7 @@ #define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ #define E1000_M88E1543_EEE_CTRL_1 0x0 #define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ +#define E1000_M88E1543_FIBER_CTRL 0x0 #define E1000_EEE_ADV_DEV_I354 7 #define E1000_EEE_ADV_ADDR_I354 60 #define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 65d931669f81..29f59c76878a 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -900,3 +900,30 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) wr32(E1000_MDICNFG, mdicnfg); return ret_val; } + +/** + * igb_get_cfg_done_i210 - Read config done bit + * @hw: pointer to the HW structure + * + * Read the management control register for the config done bit for + * completion status. NOTE: silicon which is EEPROM-less will fail trying + * to read the config done bit, so an error is *ONLY* logged and returns + * 0. If we were to return with error, EEPROM-less silicon + * would not be able to be reset or change link. + **/ +s32 igb_get_cfg_done_i210(struct e1000_hw *hw) +{ + s32 timeout = PHY_CFG_TIMEOUT; + u32 mask = E1000_NVM_CFG_DONE_PORT_0; + + while (timeout) { + if (rd32(E1000_EEMNGCTL_I210) & mask) + break; + usleep_range(1000, 2000); + timeout--; + } + if (!timeout) + hw_dbg("MNG configuration cycle has not completed.\n"); + + return 0; +} diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index 3442b6357d01..eaa68a50cb3b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -34,6 +34,7 @@ s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); s32 igb_init_nvm_params_i210(struct e1000_hw *hw); bool igb_get_flash_presence_i210(struct e1000_hw *hw); s32 igb_pll_workaround_i210(struct e1000_hw *hw); +s32 igb_get_cfg_done_i210(struct e1000_hw *hw); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 23ec28f43f6d..c0df40f2b295 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -2278,6 +2278,100 @@ out: } /** + * igb_initialize_M88E1543_phy - Initialize M88E1512 PHY + * @hw: pointer to the HW structure + * + * Initialize Marvell 1543 to work correctly with Avoton. + **/ +s32 igb_initialize_M88E1543_phy(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = 0; + + /* Switch to PHY page 0xFF. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xDC0C); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159); + if (ret_val) + goto out; + + /* Switch to PHY page 0xFB. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB); + if (ret_val) + goto out; + + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x0C0D); + if (ret_val) + goto out; + + /* Switch to PHY page 0x12. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12); + if (ret_val) + goto out; + + /* Change mode to SGMII-to-Copper */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001); + if (ret_val) + goto out; + + /* Switch to PHY page 1. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x1); + if (ret_val) + goto out; + + /* Change mode to 1000BASE-X/SGMII and autoneg enable */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_FIBER_CTRL, 0x9140); + if (ret_val) + goto out; + + /* Return the PHY to page 0. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); + if (ret_val) + goto out; + + ret_val = igb_phy_sw_reset(hw); + if (ret_val) { + hw_dbg("Error committing the PHY changes\n"); + return ret_val; + } + + /* msec_delay(1000); */ + usleep_range(1000, 2000); +out: + return ret_val; +} + +/** * igb_power_up_phy_copper - Restore copper link in case of PHY power down * @hw: pointer to the HW structure * diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 24d55edbb0e3..aa1ae61a61d8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -62,6 +62,7 @@ void igb_power_up_phy_copper(struct e1000_hw *hw); void igb_power_down_phy_copper(struct e1000_hw *hw); s32 igb_phy_init_script_igp3(struct e1000_hw *hw); s32 igb_initialize_M88E1512_phy(struct e1000_hw *hw); +s32 igb_initialize_M88E1543_phy(struct e1000_hw *hw); s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 4af2870e49f8..21d9d02885cb 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -66,6 +66,7 @@ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEMNGCTL_I210 0x12030 /* MNG EEprom Control */ #define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */ #define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ #define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ @@ -385,8 +386,7 @@ do { \ #define array_wr32(reg, offset, value) \ wr32((reg) + ((offset) << 2), (value)) -#define array_rd32(reg, offset) \ - (readl(hw->hw_addr + reg + ((offset) << 2))) +#define array_rd32(reg, offset) (igb_rd32(hw, reg + ((offset) << 2))) /* DMA Coalescing registers */ #define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 1a2f1cc44b28..e3cb93bdb21a 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -389,6 +389,8 @@ struct igb_adapter { u16 link_speed; u16 link_duplex; + u8 __iomem *io_addr; /* Mainly for iounmap use */ + struct work_struct reset_task; struct work_struct watchdog_task; bool fc_autoneg; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ea7b09887245..7afde455326d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -946,7 +946,6 @@ static void igb_configure_msix(struct igb_adapter *adapter) static int igb_request_msix(struct igb_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct e1000_hw *hw = &adapter->hw; int i, err = 0, vector = 0, free_vector = 0; err = request_irq(adapter->msix_entries[vector].vector, @@ -959,7 +958,7 @@ static int igb_request_msix(struct igb_adapter *adapter) vector++; - q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); + q_vector->itr_register = adapter->io_addr + E1000_EITR(vector); if (q_vector->rx.ring && q_vector->tx.ring) sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, @@ -1230,7 +1229,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, q_vector->tx.work_limit = adapter->tx_work_limit; /* initialize ITR configuration */ - q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_register = adapter->io_addr + E1000_EITR(0); q_vector->itr_val = IGB_START_ITR; /* initialize pointer to rings */ @@ -2294,9 +2293,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); err = -EIO; - hw->hw_addr = pci_iomap(pdev, 0, 0); - if (!hw->hw_addr) + adapter->io_addr = pci_iomap(pdev, 0, 0); + if (!adapter->io_addr) goto err_ioremap; + /* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */ + hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igb_netdev_ops; igb_set_ethtool_ops(netdev); @@ -2656,7 +2657,7 @@ err_sw_init: #ifdef CONFIG_PCI_IOV igb_disable_sriov(pdev); #endif - pci_iounmap(pdev, hw->hw_addr); + pci_iounmap(pdev, adapter->io_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: @@ -2823,7 +2824,7 @@ static void igb_remove(struct pci_dev *pdev) igb_clear_interrupt_scheme(adapter); - pci_iounmap(pdev, hw->hw_addr); + pci_iounmap(pdev, adapter->io_addr); if (hw->flash_address) iounmap(hw->flash_address); pci_release_selected_regions(pdev, @@ -2856,6 +2857,13 @@ static void igb_probe_vfs(struct igb_adapter *adapter) if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) return; + /* Of the below we really only want the effect of getting + * IGB_FLAG_HAS_MSIX set (if available), without which + * igb_enable_sriov() has no effect. + */ + igb_set_interrupt_capability(adapter, true); + igb_reset_interrupt_capability(adapter); + pci_sriov_set_totalvfs(pdev, 7); igb_enable_sriov(pdev, max_vfs); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c..f4c9a42dafcf 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -139,6 +139,7 @@ enum ixgbe_tx_flags { #define IXGBE_X540_VF_DEVICE_ID 0x1515 struct vf_data_storage { + struct pci_dev *vfdev; unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; @@ -224,6 +225,8 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; +#define IXGBE_TS_HDR_LEN 8 + enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, @@ -282,6 +285,8 @@ struct ixgbe_ring { u16 next_to_use; u16 next_to_clean; + unsigned long last_rx_timestamp; + union { u16 next_to_alloc; struct { @@ -587,9 +592,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) struct ixgbe_mac_addr { u8 addr[ETH_ALEN]; - u16 queue; + u16 pool; u16 state; /* bitmask */ }; + #define IXGBE_MAC_STATE_DEFAULT 0x1 #define IXGBE_MAC_STATE_MODIFIED 0x2 #define IXGBE_MAC_STATE_IN_USE 0x4 @@ -639,6 +645,8 @@ struct ixgbe_adapter { #define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22) #define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23) #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) +#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) +#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0) @@ -656,6 +664,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_VXLAN #define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) #endif +#define IXGBE_FLAG2_VLAN_PROMISC BIT(13) /* Tx fast path data */ int num_tx_queues; @@ -755,9 +764,12 @@ struct ixgbe_adapter { unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; spinlock_t tmreg_lock; - struct cyclecounter cc; - struct timecounter tc; + struct cyclecounter hw_cc; + struct timecounter hw_tc; u32 base_incval; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; + void (*ptp_setup_sdp)(struct ixgbe_adapter *); /* SR-IOV */ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); @@ -883,9 +895,10 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); +void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid); void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *, struct ixgbe_ring *); @@ -968,12 +981,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter); void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb); +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *); +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb); +static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) { + ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb); + return; + } + + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + return; + + ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 65db69b862fb..d8a9fb8a59e2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -765,13 +765,14 @@ mac_reset_top: ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST) { status = IXGBE_ERR_RESET_FAILED; @@ -879,11 +880,12 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) * @vlan: VLAN id to write to VLAN filter * @vind: VMDq output index that maps queue to VLAN id in VFTA * @vlan_on: boolean flag to turn on/off VLAN in VFTA + * @vlvf_bypass: boolean flag - unused * * Turn on/off specified VLAN in the VLAN filter table. **/ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on) + bool vlan_on, bool vlvf_bypass) { u32 regindex; u32 bitindex; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index a39afcf03e2c..fa8d4f40ac2a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -990,13 +990,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { @@ -1082,12 +1083,16 @@ mac_reset_top: /* Add the SAN MAC address to the RAR only if it's a valid address */ if (is_valid_ether_addr(hw->mac.san_addr)) { - hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, - hw->mac.san_addr, 0, IXGBE_RAH_AV); - /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* clear VMDq pool/queue selection for this RAR */ + hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index, + IXGBE_CLEAR_VMDQ_ALL); + /* Reserve the last RAR for the SAN MAC address */ hw->mac.num_rar_entries--; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index ce61b36b94f1..64045053e874 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -1884,10 +1884,11 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw) hw_dbg(hw, " New MAC Addr =%pM\n", hw->mac.addr); hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); - - /* clear VMDq pool/queue selection for RAR 0 */ - hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); } + + /* clear VMDq pool/queue selection for RAR 0 */ + hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); + hw->addr_ctrl.overflow_promisc = 0; hw->addr_ctrl.rar_used_count = 1; @@ -2454,6 +2455,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* Always set this bit to ensure any future transactions are blocked */ IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + /* Poll for bit to read as set */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) + break; + usleep_range(100, 120); + } + if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + hw_dbg(hw, "GIO disable did not set - requesting resets\n"); + goto gio_disable_fail; + } + /* Exit if master requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) @@ -2475,6 +2487,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) * again to clear out any effects they may have had on our device. */ hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); +gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; if (hw->mac.type >= ixgbe_mac_X550) @@ -2987,43 +3000,44 @@ s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw) * return the VLVF index where this VLAN id should be placed * **/ -static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) +static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) { - u32 bits = 0; - u32 first_empty_slot = 0; - s32 regindex; + s32 regindex, first_empty_slot; + u32 bits; /* short cut the special case */ if (vlan == 0) return 0; - /* - * Search for the vlan id in the VLVF entries. Save off the first empty - * slot found along the way - */ - for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) { + /* if vlvf_bypass is set we don't want to use an empty slot, we + * will simply bypass the VLVF if there are no entries present in the + * VLVF that contain our VLAN + */ + first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0; + + /* add VLAN enable bit for comparison */ + vlan |= IXGBE_VLVF_VIEN; + + /* Search for the vlan id in the VLVF entries. Save off the first empty + * slot found along the way. + * + * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1 + */ + for (regindex = IXGBE_VLVF_ENTRIES; --regindex;) { bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex)); - if (!bits && !(first_empty_slot)) + if (bits == vlan) + return regindex; + if (!first_empty_slot && !bits) first_empty_slot = regindex; - else if ((bits & 0x0FFF) == vlan) - break; } - /* - * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan - * in the VLVF. Else use the first empty VLVF register for this - * vlan id. - */ - if (regindex >= IXGBE_VLVF_ENTRIES) { - if (first_empty_slot) - regindex = first_empty_slot; - else { - hw_dbg(hw, "No space in VLVF.\n"); - regindex = IXGBE_ERR_NO_SPACE; - } - } + /* If we are here then we didn't find the VLAN. Return first empty + * slot we found during our search, else error. + */ + if (!first_empty_slot) + hw_dbg(hw, "No space in VLVF.\n"); - return regindex; + return first_empty_slot ? : IXGBE_ERR_NO_SPACE; } /** @@ -3032,21 +3046,17 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) * @vlan: VLAN id to write to VLAN filter * @vind: VMDq output index that maps queue to VLAN id in VFVFB * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vlvf_bypass: boolean flag indicating updating default pool is okay * * Turn on/off specified VLAN in the VLAN filter table. **/ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on) + bool vlan_on, bool vlvf_bypass) { - s32 regindex; - u32 bitindex; - u32 vfta; - u32 bits; - u32 vt; - u32 targetbit; - bool vfta_changed = false; + u32 regidx, vfta_delta, vfta, bits; + s32 vlvf_index; - if (vlan > 4095) + if ((vlan > 4095) || (vind > 63)) return IXGBE_ERR_PARAM; /* @@ -3061,22 +3071,16 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, * bits[11-5]: which register * bits[4-0]: which bit in the register */ - regindex = (vlan >> 5) & 0x7F; - bitindex = vlan & 0x1F; - targetbit = (1 << bitindex); - vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); - - if (vlan_on) { - if (!(vfta & targetbit)) { - vfta |= targetbit; - vfta_changed = true; - } - } else { - if ((vfta & targetbit)) { - vfta &= ~targetbit; - vfta_changed = true; - } - } + regidx = vlan / 32; + vfta_delta = 1 << (vlan % 32); + vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx)); + + /* vfta_delta represents the difference between the current value + * of vfta and the value we want in the register. Since the diff + * is an XOR mask we can just update vfta using an XOR. + */ + vfta_delta &= vlan_on ? ~vfta : vfta; + vfta ^= vfta_delta; /* Part 2 * If VT Mode is set @@ -3086,85 +3090,67 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, * Or !vlan_on * clear the pool bit and possibly the vind */ - vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL); - if (vt & IXGBE_VT_CTL_VT_ENABLE) { - s32 vlvf_index; - - vlvf_index = ixgbe_find_vlvf_slot(hw, vlan); - if (vlvf_index < 0) - return vlvf_index; - - if (vlan_on) { - /* set the pool bit */ - if (vind < 32) { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index*2)); - bits |= (1 << vind); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(vlvf_index*2), - bits); - } else { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index*2)+1)); - bits |= (1 << (vind-32)); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((vlvf_index*2)+1), - bits); - } - } else { - /* clear the pool bit */ - if (vind < 32) { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index*2)); - bits &= ~(1 << vind); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(vlvf_index*2), - bits); - bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index*2)+1)); - } else { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((vlvf_index*2)+1)); - bits &= ~(1 << (vind-32)); - IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((vlvf_index*2)+1), - bits); - bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB(vlvf_index*2)); - } - } + if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE)) + goto vfta_update; + + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass); + if (vlvf_index < 0) { + if (vlvf_bypass) + goto vfta_update; + return vlvf_index; + } - /* - * If there are still bits set in the VLVFB registers - * for the VLAN ID indicated we need to see if the - * caller is requesting that we clear the VFTA entry bit. - * If the caller has requested that we clear the VFTA - * entry bit but there are still pools/VFs using this VLAN - * ID entry then ignore the request. We're not worried - * about the case where we're turning the VFTA VLAN ID - * entry bit on, only when requested to turn it off as - * there may be multiple pools and/or VFs using the - * VLAN ID entry. In that case we cannot clear the - * VFTA bit until all pools/VFs using that VLAN ID have also - * been cleared. This will be indicated by "bits" being - * zero. + bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32)); + + /* set the pool bit */ + bits |= 1 << (vind % 32); + if (vlan_on) + goto vlvf_update; + + /* clear the pool bit */ + bits ^= 1 << (vind % 32); + + if (!bits && + !IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + 1 - vind / 32))) { + /* Clear VFTA first, then disable VLVF. Otherwise + * we run the risk of stray packets leaking into + * the PF via the default pool */ - if (bits) { - IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), - (IXGBE_VLVF_VIEN | vlan)); - if (!vlan_on) { - /* someone wants to clear the vfta entry - * but some pools/VFs are still using it. - * Ignore it. */ - vfta_changed = false; - } - } else { - IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); - } + if (vfta_delta) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta); + + /* disable VLVF and clear remaining bit from pool */ + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), 0); + + return 0; } - if (vfta_changed) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta); + /* If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + vfta_delta = 0; + +vlvf_update: + /* record pool change and enable VLAN ID if not already enabled */ + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), bits); + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), IXGBE_VLVF_VIEN | vlan); + +vfta_update: + /* Update VFTA now that we are ready for traffic */ + if (vfta_delta) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta); return 0; } @@ -3184,8 +3170,8 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) { IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0); - IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset*2), 0); - IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset*2)+1), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2 + 1), 0); } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index a0044e4a8b90..2b9563137fd8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -92,7 +92,7 @@ s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq); s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, - u32 vind, bool vlan_on); + u32 vind, bool vlan_on, bool vlvf_bypass); s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw); s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index d681273bd39d..1ed4c9add00d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -166,8 +166,6 @@ static int ixgbe_get_settings(struct net_device *netdev, /* set the supported link speeds */ if (supported_link & IXGBE_LINK_SPEED_10GB_FULL) ecmd->supported |= SUPPORTED_10000baseT_Full; - if (supported_link & IXGBE_LINK_SPEED_2_5GB_FULL) - ecmd->supported |= SUPPORTED_2500baseX_Full; if (supported_link & IXGBE_LINK_SPEED_1GB_FULL) ecmd->supported |= SUPPORTED_1000baseT_Full; if (supported_link & IXGBE_LINK_SPEED_100_FULL) @@ -179,8 +177,6 @@ static int ixgbe_get_settings(struct net_device *netdev, ecmd->advertising |= ADVERTISED_100baseT_Full; if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) ecmd->advertising |= ADVERTISED_10000baseT_Full; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL) - ecmd->advertising |= ADVERTISED_2500baseX_Full; if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) ecmd->advertising |= ADVERTISED_1000baseT_Full; } else { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 631c603fc966..5f988703e1b7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -620,8 +620,7 @@ static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu) struct ixgbe_fcoe_ddp_pool *ddp_pool; ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); - if (ddp_pool->pool) - dma_pool_destroy(ddp_pool->pool); + dma_pool_destroy(ddp_pool->pool); ddp_pool->pool = NULL; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4089d776d01a..66c64a376719 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -65,9 +65,6 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" -#ifdef CONFIG_IXGBE_VXLAN -#include <net/vxlan.h> -#endif char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -175,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *ixgbe_wq; + static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, @@ -316,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) if (!test_bit(__IXGBE_DOWN, &adapter->state) && !test_bit(__IXGBE_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbe_wq, &adapter->service_task); } static void ixgbe_remove_adapter(struct ixgbe_hw *hw) @@ -1635,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { struct net_device *dev = rx_ring->netdev; + u32 flags = rx_ring->q_vector->adapter->flags; ixgbe_update_rsc_stats(rx_ring, skb); @@ -1642,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); - if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb); + if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED)) + ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -2741,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) @@ -2757,7 +2757,7 @@ static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -2786,7 +2786,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget) ixgbe_for_each_ring(ring, q_vector->tx) clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); - if (!ixgbe_qv_lock_napi(q_vector)) + /* Exit if we are called by netpoll or busy polling is active */ + if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) return budget; /* attempt to distribute budget to each queue fairly, but don't allow @@ -2947,10 +2948,10 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* would disable interrupts here but EIAM disabled it */ - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); /* * re-enable link(maybe) and non-queue interrupts, no flush. @@ -3315,8 +3316,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, } /** - * Return a number of entries in the RSS indirection table - * + * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries * @adapter: device handle * * - 82598/82599/X540: 128 @@ -3334,8 +3334,7 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW - * + * ixgbe_store_reta - Write the RETA table to HW * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. @@ -3374,8 +3373,7 @@ void ixgbe_store_reta(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW (for x550 devices in SRIOV mode) - * + * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode) * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. @@ -3704,6 +3702,9 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) /* Map PF MAC address in RAR Entry 0 to first pool following VFs */ hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0)); + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC; + /* * Set up VF register offsets for selected VT Mode, * i.e. 32 or 64 VFs for SR-IOV @@ -3901,12 +3902,56 @@ static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, struct ixgbe_hw *hw = &adapter->hw; /* add VID to filter table */ - hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true); + hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true, true); set_bit(vid, adapter->active_vlans); return 0; } +static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan) +{ + u32 vlvf; + int idx; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the vlan id in the VLVF entries */ + for (idx = IXGBE_VLVF_ENTRIES; --idx;) { + vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(idx)); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + return idx; +} + +void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 bits, word; + int idx; + + idx = ixgbe_find_vlvf_entry(hw, vid); + if (!idx) + return; + + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + word = idx * 2 + (VMDQ_P(0) / 32); + bits = ~(1 << (VMDQ_P(0)) % 32); + bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word)); + + /* Disable the filter so this falls into the default pool. */ + if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1))) { + if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVF(idx), 0); + } +} + static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { @@ -3914,7 +3959,11 @@ static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, struct ixgbe_hw *hw = &adapter->hw; /* remove VID from filter table */ - hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), false); + if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) + ixgbe_update_pf_promisc_vlvf(adapter, vid); + else + hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), false, true); + clear_bit(vid, adapter->active_vlans); return 0; @@ -3992,6 +4041,129 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter) } } +static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl, i; + + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + default: + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + break; + /* fall through */ + case ixgbe_mac_82598EB: + /* legacy case, we can just disable VLAN filtering */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + return; + } + + /* We are already in VLAN promisc, nothing to do */ + if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; + + /* Add PF to all active pools */ + for (i = IXGBE_VLVF_ENTRIES; --i;) { + u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); + u32 vlvfb = IXGBE_READ_REG(hw, reg_offset); + + vlvfb |= 1 << (VMDQ_P(0) % 32); + IXGBE_WRITE_REG(hw, reg_offset, vlvfb); + } + + /* Set all bits in the VLAN filter table array */ + for (i = hw->mac.vft_size; i--;) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), ~0U); +} + +#define VFTA_BLOCK_SIZE 8 +static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; + u32 vid_start = vfta_offset * 32; + u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); + u32 i, vid, word, bits; + + for (i = IXGBE_VLVF_ENTRIES; --i;) { + u32 vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i)); + + /* pull VLAN ID from VLVF */ + vid = vlvf & VLAN_VID_MASK; + + /* only concern outselves with a certain range */ + if (vid < vid_start || vid >= vid_end) + continue; + + if (vlvf) { + /* record VLAN ID in VFTA */ + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + /* if PF is part of this then continue */ + if (test_bit(vid, adapter->active_vlans)) + continue; + } + + /* remove PF from the pool */ + word = i * 2 + VMDQ_P(0) / 32; + bits = ~(1 << (VMDQ_P(0) % 32)); + bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word)); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits); + } + + /* extract values from active_vlans and write back to VFTA */ + for (i = VFTA_BLOCK_SIZE; i--;) { + vid = (vfta_offset + i) * 32; + word = vid / BITS_PER_LONG; + bits = vid % BITS_PER_LONG; + + vfta[i] |= adapter->active_vlans[word] >> bits; + + IXGBE_WRITE_REG(hw, IXGBE_VFTA(vfta_offset + i), vfta[i]); + } +} + +static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl, i; + + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + default: + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + break; + /* fall through */ + case ixgbe_mac_82598EB: + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + return; + } + + /* We are not in VLAN promisc, nothing to do */ + if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC; + + for (i = 0; i < hw->mac.vft_size; i += VFTA_BLOCK_SIZE) + ixgbe_scrub_vfta(adapter, i); +} + static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) { u16 vid; @@ -4034,124 +4206,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev) #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, - adapter->mac_table[i].queue, + + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, IXGBE_RAH_AV); else hw->mac.ops.clear_rar(hw, i); - - adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); } } -#endif +#endif static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & - IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - else - hw->mac.ops.clear_rar(hw, i); - adapter->mac_table[i].state &= - ~(IXGBE_MAC_STATE_MODIFIED); - } + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED)) + continue; + + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); } } static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) { - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; } + ixgbe_sync_mac_table(adapter); } -static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i, count = 0; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state == 0) - count++; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* do not count default RAR as available */ + if (mac_table->state & IXGBE_MAC_STATE_DEFAULT) + continue; + + /* only count unused and addresses that belong to us */ + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) { + if (mac_table->pool != pool) + continue; + } + + count++; } + return count; } /* this function destroys the first RAR entry */ -static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, - u8 *addr) +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].queue = VMDQ_P(0); - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, + memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN); + mac_table->pool = VMDQ_P(0); + + mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE; + + hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool, IXGBE_RAH_AV); } -int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) continue; - adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | - IXGBE_MAC_STATE_IN_USE); - ether_addr_copy(adapter->mac_table[i].addr, addr); - adapter->mac_table[i].queue = queue; + + ether_addr_copy(mac_table->addr, addr); + mac_table->pool = pool; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE; + ixgbe_sync_mac_table(adapter); + return i; } + return -ENOMEM; } -int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { - /* search table for addr, if found, set to 0 and sync */ - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; - ixgbe_sync_mac_table(adapter); - return 0; - } + /* search table for addr, if found clear IN_USE flag and sync */ + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* we can only delete an entry if it is in use */ + if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE)) + continue; + /* we only care about entries that belong to the given pool */ + if (mac_table->pool != pool) + continue; + /* we only care about a specific MAC address */ + if (!ether_addr_equal(addr, mac_table->addr)) + continue; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; + + ixgbe_sync_mac_table(adapter); + + return 0; } + return -ENOMEM; } /** @@ -4169,7 +4373,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) return -ENOMEM; if (!netdev_uc_empty(netdev)) { @@ -4183,6 +4387,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) return count; } +static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0)); + + return min_t(int, ret, 0); +} + +static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0)); + + return 0; +} + /** * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4197,12 +4420,10 @@ void ixgbe_set_rx_mode(struct net_device *netdev) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; - u32 vlnctrl; int count; /* Check for Promiscuous and All Multicast modes */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* set all bits that we expect to always be set */ fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */ @@ -4212,25 +4433,18 @@ void ixgbe_set_rx_mode(struct net_device *netdev) /* clear the bits we are changing the status of */ fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); if (netdev->flags & IFF_PROMISC) { hw->addr_ctrl.user_set_promisc = true; fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); vmolr |= IXGBE_VMOLR_MPE; - /* Only disable hardware filter vlans in promiscuous mode - * if SR-IOV and VMDQ are disabled - otherwise ensure - * that hardware VLAN filters remain enabled. - */ - if (adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED | - IXGBE_FLAG_SRIOV_ENABLED)) - vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + ixgbe_vlan_promisc_enable(adapter); } else { if (netdev->flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; vmolr |= IXGBE_VMOLR_MPE; } - vlnctrl |= IXGBE_VLNCTRL_VFE; hw->addr_ctrl.user_set_promisc = false; + ixgbe_vlan_promisc_disable(adapter); } /* @@ -4238,8 +4452,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); - if (count < 0) { + if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) { fctrl |= IXGBE_FCTRL_UPE; vmolr |= IXGBE_VMOLR_ROPE; } @@ -4275,7 +4488,6 @@ void ixgbe_set_rx_mode(struct net_device *netdev) /* NOTE: VLAN filtering is disabled by setting PROMISC */ } - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) @@ -5042,7 +5254,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int err; - u8 old_addr[ETH_ALEN]; if (ixgbe_removed(hw->hw_addr)) return; @@ -5078,10 +5289,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - /* do not flush user set addresses */ - memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); + + /* flush entries out of MAC table */ ixgbe_flush_sw_mac_table(adapter); - ixgbe_mac_set_default_filter(adapter, old_addr); + __dev_uc_unsync(netdev, NULL); + + /* do not flush user set addresses */ + ixgbe_mac_set_default_filter(adapter); /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) @@ -5331,6 +5545,8 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * hw->mac.num_rar_entries, GFP_ATOMIC); + if (!adapter->mac_table) + return -ENOMEM; /* Set MAC specific capability flags and exceptions */ switch (hw->mac.type) { @@ -6616,10 +6832,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *vfdev; + unsigned int vf; u32 gpc; - int pos; - unsigned short vf_id; if (!(netif_carrier_ok(adapter->netdev))) return; @@ -6636,26 +6850,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) if (!pdev) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return; - - /* get the device ID for the VF */ - pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); - /* check status reg for all VFs owned by this PF */ - vfdev = pci_get_device(pdev->vendor, vf_id, NULL); - while (vfdev) { - if (vfdev->is_virtfn && (vfdev->physfn == pdev)) { - u16 status_reg; - - pci_read_config_word(vfdev, PCI_STATUS, &status_reg); - if (status_reg & PCI_STATUS_REC_MASTER_ABORT) - /* issue VFLR */ - ixgbe_issue_vf_flr(adapter, vfdev); - } + for (vf = 0; vf < adapter->num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + u16 status_reg; - vfdev = pci_get_device(pdev->vendor, vf_id, vfdev); + if (!vfdev) + continue; + pci_read_config_word(vfdev, PCI_STATUS, &status_reg); + if (status_reg != IXGBE_FAILED_READ_CFG_WORD && + status_reg & PCI_STATUS_REC_MASTER_ABORT) + ixgbe_issue_vf_flr(adapter, vfdev); } } @@ -7024,6 +7229,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, struct tcphdr *tcphdr; u8 *raw; } transport_hdr; + __be16 frag_off; if (skb->encapsulation) { network_hdr.raw = skb_inner_network_header(skb); @@ -7047,13 +7253,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, case 6: vlan_macip_lens |= transport_hdr.raw - network_hdr.raw; l4_hdr = network_hdr.ipv6->nexthdr; + if (likely((transport_hdr.raw - network_hdr.raw) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but version=%d\n", - network_hdr.ipv4->version); - } + break; } switch (l4_hdr) { @@ -7074,16 +7284,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, version=%d, l4 proto=%x\n", + network_hdr.ipv4->version, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -7659,17 +7871,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; - int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); - return ret > 0 ? 0 : ret; + ixgbe_mac_set_default_filter(adapter); + + return 0; } static int @@ -8152,7 +8363,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev)) + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 pool = VMDQ_P(0); + + if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool)) return -ENOMEM; } @@ -8870,7 +9084,7 @@ skip_sriov: goto err_sw_init; } - ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, (unsigned long) adapter); @@ -9325,6 +9539,12 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); + ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name); + if (!ixgbe_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name); + return -ENOMEM; + } + ixgbe_dbg_init(); ret = pci_register_driver(&ixgbe_driver); @@ -9356,6 +9576,10 @@ static void __exit ixgbe_exit_module(void) pci_unregister_driver(&ixgbe_driver); ixgbe_dbg_exit(); + if (ixgbe_wq) { + destroy_workqueue(ixgbe_wq); + ixgbe_wq = NULL; + } } #ifdef CONFIG_IXGBE_DCA diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index fb8673d63806..db0731e05401 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return 0; + if (!on && ixgbe_mng_present(hw)) + return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5ba04025e2b..ef1504d41890 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ *******************************************************************************/ #include "ixgbe.h" #include <linux/ptp_classify.h> +#include <linux/clocksource.h> /* * The 82599 and the X540 do not have true 64bit nanosecond scale @@ -93,7 +94,6 @@ #define IXGBE_INCVAL_SHIFT_82599 7 #define IXGBE_INCPER_SHIFT_82599 24 -#define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL #define IXGBE_OVERFLOW_PERIOD (HZ * 30) #define IXGBE_PTP_TX_TIMEOUT (HZ * 15) @@ -104,8 +104,68 @@ */ #define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL +/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL + * which contain measurements of seconds and nanoseconds respectively. This + * matches the standard linux representation of time in the kernel. In addition, + * the X550 also has a SYSTIMER register which represents residue, or + * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA + * register is used, but it is unlike the X540 and 82599 devices. TIMINCA + * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the + * high bit representing whether the adjustent is positive or negative. Every + * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range + * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the + * X550's clock for purposes of SYSTIME generation is constant and not dependent + * on the link speed. + * + * SYSTIMEH SYSTIMEL SYSTIMER + * +--------------+ +--------------+ +-------------+ + * X550 | 32 | | 32 | | 32 | + * *--------------+ +--------------+ +-------------+ + * \____seconds___/ \_nanoseconds_/ \__2^-32 ns__/ + * + * This results in a full 96 bits to represent the clock, with 32 bits for + * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under + * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for + * underflow of adjustments. + * + * The 32 bits of seconds for the X550 overflows every + * 2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years. + * + * In order to adjust the clock frequency for the X550, the TIMINCA register is + * provided. This register represents a + or minus nearly 0.5 ns adjustment to + * the base frequency. It is measured in 2^-32 ns units, with the high bit being + * the sign bit. This register enables software to calculate frequency + * adjustments and apply them directly to the clock rate. + * + * The math for converting ppb into TIMINCA values is fairly straightforward. + * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL + * + * This assumes that ppb is never high enough to create a value bigger than + * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this + * value is also simple. + * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL + * + * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is + * 12.5 nanoseconds. This means that the Max ppb is 39999999 + * Note: We subtract one in order to ensure no overflow, because the TIMINCA + * register can only hold slightly under 0.5 nanoseconds. + * + * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns + * into 2^-32 units, which is + * + * 12.5 * 2^32 = C80000000 + * + * Some revisions of hardware have a faster base frequency than the registers + * were defined for. To fix this, we use a timecounter structure with the + * proper mult and shift to convert the cycles into nanoseconds of time. + */ +#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL +#define INCVALUE_MASK 0x7FFFFFFF +#define ISGN 0x80000000 +#define MAX_TIMADJ 0x7FFFFFFF + /** - * ixgbe_ptp_setup_sdp + * ixgbe_ptp_setup_sdp_x540 * @hw: the hardware private structure * * this function enables or disables the clock out feature on SDP0 for @@ -116,83 +176,116 @@ * aligns the start of the PPS signal to that value. The shift is * necessary because it can change based on the link speed. */ -static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int shift = adapter->cc.shift; + int shift = adapter->hw_cc.shift; u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem; u64 ns = 0, clock_edge = 0; - if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) && - (hw->mac.type == ixgbe_mac_X540)) { + /* disable the pin first */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); + IXGBE_WRITE_FLUSH(hw); - /* disable the pin first */ - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - IXGBE_WRITE_FLUSH(hw); + if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) + return; - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - /* - * enable the SDP0 pin as output, and connected to the - * native function for Timesync (ClockOut) - */ - esdp |= (IXGBE_ESDP_SDP0_DIR | - IXGBE_ESDP_SDP0_NATIVE); + /* enable the SDP0 pin as output, and connected to the + * native function for Timesync (ClockOut) + */ + esdp |= IXGBE_ESDP_SDP0_DIR | + IXGBE_ESDP_SDP0_NATIVE; - /* - * enable the Clock Out feature on SDP0, and allow - * interrupts to occur when the pin changes - */ - tsauxc = (IXGBE_TSAUXC_EN_CLK | - IXGBE_TSAUXC_SYNCLK | - IXGBE_TSAUXC_SDP0_INT); + /* enable the Clock Out feature on SDP0, and allow + * interrupts to occur when the pin changes + */ + tsauxc = IXGBE_TSAUXC_EN_CLK | + IXGBE_TSAUXC_SYNCLK | + IXGBE_TSAUXC_SDP0_INT; - /* clock period (or pulse length) */ - clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); - clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); + /* clock period (or pulse length) */ + clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); + clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); - /* - * Account for the cyclecounter wrap-around value by - * using the converted ns value of the current time to - * check for when the next aligned second would occur. - */ - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; - ns = timecounter_cyc2time(&adapter->tc, clock_edge); + /* Account for the cyclecounter wrap-around value by + * using the converted ns value of the current time to + * check for when the next aligned second would occur. + */ + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; + ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge); - div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); - clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); + div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); + clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); - /* specify the initial clock start time */ - trgttiml = (u32)clock_edge; - trgttimh = (u32)(clock_edge >> 32); + /* specify the initial clock start time */ + trgttiml = (u32)clock_edge; + trgttimh = (u32)(clock_edge >> 32); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); - } else { - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - } + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); IXGBE_WRITE_FLUSH(hw); } /** - * ixgbe_ptp_read - read raw cycle counter (to be used by time counter) + * ixgbe_ptp_read_X550 - read cycle counter value + * @hw_cc: cyclecounter structure + * + * This function reads SYSTIME registers. It is called by the cyclecounter + * structure to convert from internal representation into nanoseconds. We need + * this for X550 since some skews do not have expected clock frequency and + * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of + * "cycles", rather than seconds and nanoseconds. + */ +static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc) +{ + struct ixgbe_adapter *adapter = + container_of(hw_cc, struct ixgbe_adapter, hw_cc); + struct ixgbe_hw *hw = &adapter->hw; + struct timespec64 ts; + + /* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'. + * Some revisions of hardware run at a higher frequency and so the + * cycles are not guaranteed to be nanoseconds. The timespec64 created + * here is used for its math/conversions but does not necessarily + * represent nominal time. + * + * It should be noted that this cyclecounter will overflow at a + * non-bitmask field since we have to convert our billions of cycles + * into an actual cycles count. This results in some possible weird + * situations at high cycle counter stamps. However given that 32 bits + * of "seconds" is ~138 years this isn't a problem. Even at the + * increased frequency of some revisions, this is still ~103 years. + * Since the SYSTIME values start at 0 and we never write them, it is + * highly unlikely for the cyclecounter to overflow in practice. + */ + IXGBE_READ_REG(hw, IXGBE_SYSTIMR); + ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML); + ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH); + + return (u64)timespec64_to_ns(&ts); +} + +/** + * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter) * @cc: the cyclecounter structure * * this function reads the cyclecounter registers and is called by the * cyclecounter structure used to construct a ns counter from the * arbitrary fixed point registers */ -static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) +static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc) { struct ixgbe_adapter *adapter = - container_of(cc, struct ixgbe_adapter, cc); + container_of(cc, struct ixgbe_adapter, hw_cc); struct ixgbe_hw *hw = &adapter->hw; u64 stamp = 0; @@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) } /** - * ixgbe_ptp_adjfreq + * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp + * @adapter: private adapter structure + * @hwtstamp: stack timestamp structure + * @systim: unsigned 64bit system time value + * + * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value + * which can be used by the stack's ptp functions. + * + * The lock is used to protect consistency of the cyclecounter and the SYSTIME + * registers. However, it does not need to protect against the Rx or Tx + * timestamp registers, as there can't be a new timestamp until the old one is + * unlatched by reading. + * + * In addition to the timestamp in hardware, some controllers need a software + * overflow cyclecounter, and this function takes this into account as well. + **/ +static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamp, + u64 timestamp) +{ + unsigned long flags; + struct timespec64 systime; + u64 ns; + + memset(hwtstamp, 0, sizeof(*hwtstamp)); + + switch (adapter->hw.mac.type) { + /* X550 and later hardware supposedly represent time using a seconds + * and nanoseconds counter, instead of raw 64bits nanoseconds. We need + * to convert the timestamp into cycles before it can be fed to the + * cyclecounter. We need an actual cyclecounter because some revisions + * of hardware run at a higher frequency and thus the counter does + * not represent seconds/nanoseconds. Instead it can be thought of as + * cycles and billions of cycles. + */ + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* Upper 32 bits represent billions of cycles, lower 32 bits + * represent cycles. However, we use timespec64_to_ns for the + * correct math even though the units haven't been corrected + * yet. + */ + systime.tv_sec = timestamp >> 32; + systime.tv_nsec = timestamp & 0xFFFFFFFF; + + timestamp = timespec64_to_ns(&systime); + break; + default: + break; + } + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->hw_tc, timestamp); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + hwtstamp->hwtstamp = ns_to_ktime(ns); +} + +/** + * ixgbe_ptp_adjfreq_82599 * @ptp: the ptp clock structure * @ppb: parts per billion adjustment from base * * adjust the frequency of the ptp cycle counter by the * indicated ppb from the base frequency. */ -static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - u64 freq; - u32 diff, incval; + u64 freq, incval; + u32 diff; int neg_adj = 0; if (ppb < 0) { @@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) switch (hw->mac.type) { case ixgbe_mac_X540: - IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); + if (incval > 0xFFFFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval); break; case ixgbe_mac_82599EB: + if (incval > 0x00FFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + ((u32)incval & 0x00FFFFFFUL)); break; default: break; @@ -250,6 +406,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } /** + * ixgbe_ptp_adjfreq_X550 + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * adjust the frequency of the SYSTIME registers by the indicated ppb from base + * frequency + */ +static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb) +{ + struct ixgbe_adapter *adapter = + container_of(ptp, struct ixgbe_adapter, ptp_caps); + struct ixgbe_hw *hw = &adapter->hw; + int neg_adj = 0; + u64 rate = IXGBE_X550_BASE_PERIOD; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate *= ppb; + rate = div_u64(rate, 1000000000ULL); + + /* warn if rate is too large */ + if (rate >= INCVALUE_MASK) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca); + + return 0; +} + +/** * ixgbe_ptp_adjtime * @ptp: the ptp clock structure * @delta: offset to adjust the cycle counter by @@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) unsigned long flags; spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_adjtime(&adapter->tc, delta); + timecounter_adjtime(&adapter->hw_tc, delta); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; + u64 ns; spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_read(&adapter->tc); + ns = timecounter_read(&adapter->hw_tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); *ts = ns_to_timespec64(ns); @@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp, { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; - - ns = timespec64_to_ns(ts); + u64 ns = timespec64_to_ns(ts); /* reset the timecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_init(&adapter->tc, &adapter->cc, ns); + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * event when the clock SDP triggers. Clear mask when PPS is * disabled */ - if (rq->type == PTP_CLK_REQ_PPS) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - if (on) - adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; - else - adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - - ixgbe_ptp_setup_sdp(adapter); - return 0; - default: - break; - } - } + if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) + return -ENOTSUPP; + + if (on) + adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; + else + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - return -ENOTSUPP; + adapter->ptp_setup_sdp(adapter); + return 0; } /** * ixgbe_ptp_check_pps_event * @adapter: the private adapter structure - * @eicr: the interrupt cause register value * * This function is called by the interrupt routine when checking for * interrupts. It will check and handle a pps event. */ -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ptp_clock_event event; @@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + struct ixgbe_ring *rx_ring; unsigned long rx_event; + int n; /* if we don't have a valid timestamp in the registers, just update the * timeout counter and exit @@ -437,19 +625,43 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) /* determine the most recent watchdog or rx_timestamp event */ rx_event = adapter->last_rx_ptp_check; - if (time_after(adapter->last_rx_timestamp, rx_event)) - rx_event = adapter->last_rx_timestamp; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } /* only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5*HZ)) { + if (time_is_before_jiffies(rx_event + 5 * HZ)) { IXGBE_READ_REG(hw, IXGBE_RXSTMPH); adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; e_warn(drv, "clearing RX Timestamp hang\n"); } } /** + * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state + * @adapter: the private adapter structure + * + * This function should be called whenever the state related to a Tx timestamp + * needs to be cleared. This helps ensure that all related bits are reset for + * the next Tx timestamp event. + */ +static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_READ_REG(hw, IXGBE_TXSTMPH); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); +} + +/** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @adapter: the private adapter struct * @@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; - u64 regval = 0, ns; - unsigned long flags; + u64 regval = 0; regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - shhwtstamps.hwtstamp = ns_to_ktime(ns); + ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + ixgbe_ptp_clear_tx_timestamp(adapter); } /** @@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work) IXGBE_PTP_TX_TIMEOUT); u32 tsynctxctl; - if (timeout) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - e_warn(drv, "clearing Tx Timestamp hang\n"); + /* we have to have a valid skb to poll for a timestamp */ + if (!adapter->ptp_tx_skb) { + ixgbe_ptp_clear_tx_timestamp(adapter); return; } + /* stop polling once we have a valid timestamp */ tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); - if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) + if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) { ixgbe_ptp_tx_hwtstamp(adapter); - else + return; + } + + if (timeout) { + ixgbe_ptp_clear_tx_timestamp(adapter); + adapter->tx_hwtstamp_timeouts++; + e_warn(drv, "clearing Tx Timestamp hang\n"); + } else { /* reschedule to keep checking if it's not available yet */ schedule_work(&adapter->ptp_tx_work); + } } /** - * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp - * @adapter: pointer to adapter struct + * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer + * @q_vector: structure containing interrupt and ring information + * @skb: the packet + * + * This function will be called by the Rx routine of the timestamp for this + * packet is stored in the buffer. The value is stored in little endian format + * starting at the end of the packet data. + */ +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) +{ + __le64 regval; + + /* copy the bits out of the skb, and then trim the skb length */ + skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, ®val, + IXGBE_TS_HDR_LEN); + __pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN); + + /* The timestamp is recorded in little endian format, and is stored at + * the end of the packet. + * + * DWORD: N N + 1 N + 2 + * Field: End of Packet SYSTIMH SYSTIML + */ + ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval)); +} + +/** + * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp + * @q_vector: structure containing interrupt and ring information * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { - struct ixgbe_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps *shhwtstamps; - u64 regval = 0, ns; + struct ixgbe_adapter *adapter; + struct ixgbe_hw *hw; + u64 regval = 0; u32 tsyncrxctl; - unsigned long flags; + + /* we cannot process timestamps on a ring without a q_vector */ + if (!q_vector || !q_vector->adapter) + return; + + adapter = q_vector->adapter; + hw = &adapter->hw; + + /* Read the tsyncrxctl register afterwards in order to prevent taking an + * I/O hit on every packet. + */ tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) @@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - shhwtstamps = skb_hwtstamps(skb); - shhwtstamps->hwtstamp = ns_to_ktime(ns); - - /* Update the last_rx_timestamp timer in order to enable watchdog check - * for error case of latched timestamp on a dropped packet. - */ - adapter->last_rx_timestamp = jiffies; + ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) @@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; tsync_rx_mtrl = 0; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; is_l2 = true; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_ALL: + /* The X550 controller is capable of timestamping all packets, + * which allows it to accept any filter. + */ + if (hw->mac.type >= ixgbe_mac_X550) { + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + break; + } + /* fall through */ default: /* * register RXMTRL must be set in order to do V1 packets, @@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, * Delay_Req messages and hardware does not support * timestamping all packets => return error */ + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); config->rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } if (hw->mac.type == ixgbe_mac_82598EB) { + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); if (tsync_rx_ctl | tsync_tx_ctl) return -ERANGE; return 0; } + /* Per-packet timestamping only works if the filter is set to all + * packets. Since this is desired, always timestamp all packets as long + * as any Rx filter was configured. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* enable timestamping all packets only if at least some + * packets were requested. Otherwise, play nice and disable + * timestamping + */ + if (config->rx_filter == HWTSTAMP_FILTER_NONE) + break; + + tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED | + IXGBE_TSYNCRXCTL_TYPE_ALL | + IXGBE_TSYNCRXCTL_TSIP_UT_EN; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + is_l2 = true; + break; + default: + break; + } + /* define ethertype filter for timestamping L2 packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), @@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_WRITE_FLUSH(hw); /* clear TX/RX time stamp registers, just to be sure */ - regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH); - regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH); + ixgbe_ptp_clear_tx_timestamp(adapter); + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); return 0; } @@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) -EFAULT : 0; } -/** - * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw - * @adapter: pointer to the adapter structure - * - * This function should be called to set the proper values for the TIMINCA - * register and tell the cyclecounter structure what the tick rate of SYSTIME - * is. It does not directly modify SYSTIME registers or the timecounter - * structure. It should be called whenever a new TIMINCA value is necessary, - * such as during initialization or when the link speed changes. - */ -void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter, + u32 *shift, u32 *incval) { - struct ixgbe_hw *hw = &adapter->hw; - u32 incval = 0; - u32 shift = 0; - unsigned long flags; - /** * Scale the NIC cycle counter by a large factor so that * relatively small corrections to the frequency can be added @@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) */ switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: - incval = IXGBE_INCVAL_100; - shift = IXGBE_INCVAL_SHIFT_100; + *shift = IXGBE_INCVAL_SHIFT_100; + *incval = IXGBE_INCVAL_100; break; case IXGBE_LINK_SPEED_1GB_FULL: - incval = IXGBE_INCVAL_1GB; - shift = IXGBE_INCVAL_SHIFT_1GB; + *shift = IXGBE_INCVAL_SHIFT_1GB; + *incval = IXGBE_INCVAL_1GB; break; case IXGBE_LINK_SPEED_10GB_FULL: default: - incval = IXGBE_INCVAL_10GB; - shift = IXGBE_INCVAL_SHIFT_10GB; + *shift = IXGBE_INCVAL_SHIFT_10GB; + *incval = IXGBE_INCVAL_10GB; break; } +} - /** - * Modify the calculated values to fit within the correct - * number of bits specified by the hardware. The 82599 doesn't - * have the same space as the X540, so bitshift the calculated - * values to fit. +/** + * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw + * @adapter: pointer to the adapter structure + * + * This function should be called to set the proper values for the TIMINCA + * register and tell the cyclecounter structure what the tick rate of SYSTIME + * is. It does not directly modify SYSTIME registers or the timecounter + * structure. It should be called whenever a new TIMINCA value is necessary, + * such as during initialization or when the link speed changes. + */ +void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct cyclecounter cc; + unsigned long flags; + u32 incval = 0; + u32 tsauxc = 0; + u32 fuse0 = 0; + + /* For some of the boards below this mask is technically incorrect. + * The timestamp mask overflows at approximately 61bits. However the + * particular hardware does not overflow on an even bitmask value. + * Instead, it overflows due to conversion of upper 32bits billions of + * cycles. Timecounters are not really intended for this purpose so + * they do not properly function if the overflow point isn't 2^N-1. + * However, the actual SYSTIME values in question take ~138 years to + * overflow. In practice this means they won't actually overflow. A + * proper fix to this problem would require modification of the + * timecounter delta calculations. */ + cc.mask = CLOCKSOURCE_MASK(64); + cc.mult = 1; + cc.shift = 0; + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + /* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is + * designed to represent seconds and nanoseconds when this is + * the case. However, some revisions of hardware have a 400Mhz + * clock and we have to compensate for this frequency + * variation using corrected mult and shift values. + */ + fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)); + if (!(fuse0 & IXGBE_FUSES0_300MHZ)) { + cc.mult = 3; + cc.shift = 2; + } + /* fallthrough */ + case ixgbe_mac_X550: + cc.read = ixgbe_ptp_read_X550; + + /* enable SYSTIME counter */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, + tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); + IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); + + IXGBE_WRITE_FLUSH(hw); + break; case ixgbe_mac_X540: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); break; case ixgbe_mac_82599EB: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); incval >>= IXGBE_INCVAL_SHIFT_82599; - shift -= IXGBE_INCVAL_SHIFT_82599; + cc.shift -= IXGBE_INCVAL_SHIFT_82599; IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + (1 << IXGBE_INCPER_SHIFT_82599) | incval); break; default: /* other devices aren't supported */ @@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) /* need lock to prevent incorrect read while modifying cyclecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - - memset(&adapter->cc, 0, sizeof(adapter->cc)); - adapter->cc.read = ixgbe_ptp_read; - adapter->cc.mask = CYCLECOUNTER_MASK(64); - adapter->cc.shift = shift; - adapter->cc.mult = 1; - + memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc)); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } @@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; - /* set SYSTIME registers to 0 just in case */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); - IXGBE_WRITE_FLUSH(hw); - /* reset the hardware timestamping mode */ ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + /* 82598 does not support PTP */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + ixgbe_ptp_start_cyclecounter(adapter); spin_lock_irqsave(&adapter->tmreg_lock, flags); - - /* reset the ns time counter */ - timecounter_init(&adapter->tc, &adapter->cc, + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ktime_to_ns(ktime_get_real())); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - /* - * Now that the shift has been calculated and the systime + adapter->last_overflow_check = jiffies; + + /* Now that the shift has been calculated and the systime * registers reset, (re-)enable the Clock out feature */ - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); } /** @@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) * * This function performs setup of the user entry point function table and * initializes the PTP clock device, which is used to access the clock-like - * features of the PTP core. It will be called by ixgbe_ptp_init, only if - * there isn't already a clock device (such as after a suspend/resume cycle, - * where the clock device wasn't destroyed). + * features of the PTP core. It will be called by ixgbe_ptp_init, and may + * reuse a previously initialized clock (such as during a suspend/resume + * cycle). */ -static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) +static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; long err; @@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 1; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540; break; case ixgbe_mac_82599EB: snprintf(adapter->ptp_caps.name, @@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; + adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; + adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; + adapter->ptp_caps.settime64 = ixgbe_ptp_settime; + adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 30000000; + adapter->ptp_caps.n_alarm = 0; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.n_per_out = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = NULL; break; default: adapter->ptp_clock = NULL; + adapter->ptp_setup_sdp = NULL; return -EOPNOTSUPP; } @@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter) if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state)) return; - /* since this might be called in suspend, we don't clear the state, - * but simply reset the auxiliary PPS signal control register - */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0); + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); /* ensure that we cancel any pending PTP Tx work item in progress */ cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - } + ixgbe_ptp_clear_tx_timestamp(adapter); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index fcd8b27a0ccb..eeff3d075bf8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) return -ENOMEM; } +/** + * ixgbe_get_vfs - Find and take references to all vf devices + * @adapter: Pointer to adapter struct + */ +static void ixgbe_get_vfs(struct ixgbe_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + u16 vendor = pdev->vendor; + struct pci_dev *vfdev; + int vf = 0; + u16 vf_id; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); + + vfdev = pci_get_device(vendor, vf_id, NULL); + for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) { + if (!vfdev->is_virtfn) + continue; + if (vfdev->physfn != pdev) + continue; + if (vf >= adapter->num_vfs) + continue; + pci_dev_get(vfdev); + adapter->vfinfo[vf].vfdev = vfdev; + ++vf; + } +} + /* Note this function is called when the user wants to enable SR-IOV * VFs using the now deprecated module parameter */ @@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) } } - if (!__ixgbe_enable_sriov(adapter)) + if (!__ixgbe_enable_sriov(adapter)) { + ixgbe_get_vfs(adapter); return; + } /* If we have gotten to this point then there is no memory available * to manage the VF devices - print message and bail. @@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) #endif /* #ifdef CONFIG_PCI_IOV */ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { + unsigned int num_vfs = adapter->num_vfs, vf; struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; @@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + /* put the reference to all of the vf devices */ + for (vf = 0; vf < num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + + if (!vfdev) + continue; + adapter->vfinfo[vf].vfdev = NULL; + pci_dev_put(vfdev); + } + /* free VF control structures */ kfree(adapter->vfinfo); adapter->vfinfo = NULL; @@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } + ixgbe_get_vfs(adapter); ixgbe_sriov_reinit(adapter); return num_vfs; @@ -406,11 +452,34 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf) { - /* VLAN 0 is a special case, don't allow it to be removed */ - if (!vid && !add) - return 0; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + /* If VLAN overlaps with one the PF is currently monitoring make + * sure that we are able to allocate a VLVF entry. This may be + * redundant but it guarantees PF will maintain visibility to + * the VLAN. + */ + if (add && test_bit(vid, adapter->active_vlans)) { + err = hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), true, false); + if (err) + return err; + } + + err = hw->mac.ops.set_vfta(hw, vid, vf, !!add, false); - return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add); + if (add && !err) + return err; + + /* If we failed to add the VF VLAN or we are removing the VF VLAN + * we may need to drop the PF pool bit in order to allow us to free + * up the VLVF resources. + */ + if (test_bit(vid, adapter->active_vlans) || + (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) + ixgbe_update_pf_promisc_vlvf(adapter, vid); + + return err; } static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) @@ -516,13 +585,75 @@ static void ixgbe_clear_vmvir(struct ixgbe_adapter *adapter, u32 vf) IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), 0); } + +static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 i; + + /* post increment loop, covers VLVF_ENTRIES - 1 to 0 */ + for (i = IXGBE_VLVF_ENTRIES; i--;) { + u32 word = IXGBE_VLVFB(i * 2 + vf / 32); + u32 bits[2], vlvfb, vid, vfta, vlvf; + u32 mask = 1 << (vf / 32); + + vlvfb = IXGBE_READ_REG(hw, word); + + /* if our bit isn't set we can skip it */ + if (!(vlvfb & mask)) + continue; + + /* clear our bit from vlvfb */ + vlvfb ^= mask; + + /* create 64b mask to chedk to see if we should clear VLVF */ + bits[word % 2] = vlvfb; + bits[(word % 2) ^ 1] = IXGBE_READ_REG(hw, word ^ 1); + + /* if promisc is enabled, PF will be present, leave VFTA */ + if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) { + bits[VMDQ_P(0) / 32] &= ~(1 << (VMDQ_P(0) % 32)); + + if (bits[0] || bits[1]) + goto update_vlvfb; + goto update_vlvf; + } + + /* if other pools are present, just remove ourselves */ + if (bits[0] || bits[1]) + goto update_vlvfb; + + /* if we cannot determine VLAN just remove ourselves */ + vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i)); + if (!vlvf) + goto update_vlvfb; + + vid = vlvf & VLAN_VID_MASK; + mask = 1 << (vid % 32); + + /* clear bit from VFTA */ + vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid / 32)); + if (vfta & mask) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(vid / 32), vfta ^ mask); +update_vlvf: + /* clear POOL selection enable */ + IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), 0); +update_vlvfb: + /* clear pool bits */ + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), vlvfb); + } +} + static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; u8 num_tcs = netdev_get_num_tc(adapter->netdev); - /* add PF assigned VLAN or VLAN 0 */ + /* remove VLAN filters beloning to this VF */ + ixgbe_clear_vf_vlans(adapter, vf); + + /* add back PF assigned VLAN or VLAN 0 */ ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf); /* reset offloads to defaults */ @@ -768,40 +899,14 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter, return ixgbe_set_vf_mac(adapter, vf, new_mac) < 0; } -static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan) -{ - u32 vlvf; - s32 regindex; - - /* short cut the special case */ - if (vlan == 0) - return 0; - - /* Search for the vlan id in the VLVF entries */ - for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) { - vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex)); - if ((vlvf & VLAN_VID_MASK) == vlan) - break; - } - - /* Return a negative value if not found */ - if (regindex >= IXGBE_VLVF_ENTRIES) - regindex = -1; - - return regindex; -} - static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) { + u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); + u8 tcs = netdev_get_num_tc(adapter->netdev); struct ixgbe_hw *hw = &adapter->hw; - int add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); int err; - s32 reg_ndx; - u32 vlvf; - u32 bits; - u8 tcs = netdev_get_num_tc(adapter->netdev); if (adapter->vfinfo[vf].pf_vlan || tcs) { e_warn(drv, @@ -811,54 +916,23 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, return -1; } - if (add) - adapter->vfinfo[vf].vlan_count++; - else if (adapter->vfinfo[vf].vlan_count) - adapter->vfinfo[vf].vlan_count--; - - /* in case of promiscuous mode any VLAN filter set for a VF must - * also have the PF pool added to it. - */ - if (add && adapter->netdev->flags & IFF_PROMISC) - err = ixgbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0)); + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; err = ixgbe_set_vf_vlan(adapter, add, vid, vf); - if (!err && adapter->vfinfo[vf].spoofchk_enabled) - hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); + if (err) + return err; - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && adapter->netdev->flags & IFF_PROMISC) { - reg_ndx = ixgbe_find_vlvf_entry(hw, vid); - if (reg_ndx < 0) - return err; - vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(reg_ndx)); - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - if (VMDQ_P(0) < 32) { - bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(reg_ndx * 2)); - bits &= ~(1 << VMDQ_P(0)); - bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB(reg_ndx * 2) + 1); - } else { - bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(reg_ndx * 2) + 1); - bits &= ~(1 << (VMDQ_P(0) - 32)); - bits |= IXGBE_READ_REG(hw, IXGBE_VLVFB(reg_ndx * 2)); - } + if (adapter->vfinfo[vf].spoofchk_enabled) + hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && - !test_bit(vid, adapter->active_vlans) && !bits) - ixgbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0)); - } + if (add) + adapter->vfinfo[vf].vlan_count++; + else if (adapter->vfinfo[vf].vlan_count) + adapter->vfinfo[vf].vlan_count--; - return err; + return 0; } static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, @@ -1239,6 +1313,9 @@ static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf, if (err) goto out; + /* Revoke tagless access via VLAN 0 */ + ixgbe_set_vf_vlan(adapter, false, 0, vf); + ixgbe_set_vmvir(adapter, vlan, qos, vf); ixgbe_set_vmolr(hw, vf, false); if (adapter->vfinfo[vf].spoofchk_enabled) @@ -1272,6 +1349,8 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) err = ixgbe_set_vf_vlan(adapter, false, adapter->vfinfo[vf].pf_vlan, vf); + /* Restore tagless access via VLAN 0 */ + ixgbe_set_vf_vlan(adapter, true, 0, vf); ixgbe_clear_vmvir(adapter, vf); ixgbe_set_vmolr(hw, vf, true); hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eac..06add27c8b8c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ #define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ #define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_SYSTIMR 0x08C58 /* System time register Residue - RO */ #define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ #define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ #define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ @@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ #define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ #define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ +#define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */ /* Diagnostic Registers */ #define IXGBE_RDSTATCTL 0x02C20 @@ -1345,7 +1347,10 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ +#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* global fault msg */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ /* autoneg vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 @@ -1353,6 +1358,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */ #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */ +#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /*int dev fault enable */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ @@ -2209,6 +2215,7 @@ enum { #define IXGBE_TSAUXC_EN_CLK 0x00000004 #define IXGBE_TSAUXC_SYNCLK 0x00000008 #define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000 #define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ @@ -2218,8 +2225,12 @@ enum { #define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 #define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 #define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_ALL 0x08 #define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ +#define IXGBE_TSYNCRXCTL_TSIP_UT_EN 0x00800000 /* Rx Timestamp in Packet */ + +#define IXGBE_TSIM_TXTS 0x00000002 #define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF #define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 @@ -2332,6 +2343,7 @@ enum { #define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ #define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ #define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TSIP 0x08000 /* Time Stamp in packet buffer */ #define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ #define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ #define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ @@ -3288,7 +3300,7 @@ struct ixgbe_mac_operations { s32 (*enable_mc)(struct ixgbe_hw *); s32 (*disable_mc)(struct ixgbe_hw *); s32 (*clear_vfta)(struct ixgbe_hw *); - s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool); + s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool, bool); s32 (*init_uta_tables)(struct ixgbe_hw *); void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index c1d4584f6469..2358c1b7d586 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw) struct ixgbe_phy_info *phy = &hw->phy; /* set_phy_power was set by default to NULL */ - if (!ixgbe_mng_present(hw)) - phy->ops.set_phy_power = ixgbe_set_copper_phy_power; + phy->ops.set_phy_power = ixgbe_set_copper_phy_power; mac->mcft_size = IXGBE_X540_MC_TBL_SIZE; mac->vft_size = IXGBE_X540_VFT_TBL_SIZE; @@ -110,13 +109,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { @@ -154,12 +154,16 @@ mac_reset_top: /* Add the SAN MAC address to the RAR only if it's a valid address */ if (is_valid_ether_addr(hw->mac.san_addr)) { - hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, - hw->mac.san_addr, 0, IXGBE_RAH_AV); - /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* clear VMDq pool/queue selection for this RAR */ + hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index, + IXGBE_CLEAR_VMDQ_ALL); + /* Reserve the last RAR for the SAN MAC address */ hw->mac.num_rar_entries--; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index ebe0ac950b14..f4ef0d1a5dbe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -26,6 +26,8 @@ #include "ixgbe_common.h" #include "ixgbe_phy.h" +static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed); + static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; @@ -85,79 +87,6 @@ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value) } /** - * ixgbe_check_cs4227_reg - Perform diag on a CS4227 register - * @hw: pointer to hardware structure - * @reg: the register to check - * - * Performs a diagnostic on a register in the CS4227 chip. Returns an error - * if it is not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_check_cs4227_reg(struct ixgbe_hw *hw, u16 reg) -{ - s32 status; - u32 retry; - u16 reg_val; - - reg_val = (IXGBE_CS4227_EDC_MODE_DIAG << 1) | 1; - status = ixgbe_write_cs4227(hw, reg, reg_val); - if (status) - return status; - for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) { - msleep(IXGBE_CS4227_CHECK_DELAY); - reg_val = 0xFFFF; - ixgbe_read_cs4227(hw, reg, ®_val); - if (!reg_val) - break; - } - if (reg_val) { - hw_err(hw, "CS4227 reg 0x%04X failed diagnostic\n", reg); - return status; - } - - return 0; -} - -/** - * ixgbe_get_cs4227_status - Return CS4227 status - * @hw: pointer to hardware structure - * - * Performs a diagnostic on the CS4227 chip. Returns an error if it is - * not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_get_cs4227_status(struct ixgbe_hw *hw) -{ - s32 status; - u16 value = 0; - - /* Exit if the diagnostic has already been performed. */ - status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value); - if (status) - return status; - if (value == IXGBE_CS4227_RESET_COMPLETE) - return 0; - - /* Check port 0. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB); - if (status) - return status; - - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB); - if (status) - return status; - - /* Check port 1. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB + - (1 << 12)); - if (status) - return status; - - return ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB + - (1 << 12)); -} - -/** * ixgbe_read_pe - Read register from port expander * @hw: pointer to hardware structure * @reg: register number to read @@ -326,13 +255,6 @@ static void ixgbe_check_cs4227(struct ixgbe_hw *hw) return; } - /* Is the CS4227 working correctly? */ - status = ixgbe_get_cs4227_status(hw); - if (status) { - hw_err(hw, "CS4227 status failed: %d", status); - goto out; - } - /* Record completion for next time. */ status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH, IXGBE_CS4227_RESET_COMPLETE); @@ -1257,31 +1179,71 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, if (status) return status; - /* Configure CS4227 LINE side to 10G SR. */ - slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); - value = IXGBE_CS4227_SPEED_10G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); - - /* Configure CS4227 for HOST connection rate then type. */ - slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); - value = speed & IXGBE_LINK_SPEED_10GB_FULL ? - IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* Configure CS4227 LINE side to 10G SR. */ + slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); + value = IXGBE_CS4227_SPEED_10G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Configure CS4227 for HOST connection rate then type. */ + slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); + value = speed & IXGBE_LINK_SPEED_10GB_FULL ? + IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) + slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Setup XFI internal link. */ status = ixgbe_setup_ixfi_x550em(hw, &speed); + if (status) { + hw_dbg(hw, "setup_ixfi failed with %d\n", status); + return status; + } + } else { + /* Configure internal PHY for KR/KX. */ + status = ixgbe_setup_kr_speed_x550em(hw, speed); + if (status) { + hw_dbg(hw, "setup_kr_speed failed with %d\n", status); + return status; + } + + /* Configure CS4227 LINE side to proper mode. */ + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + } + return 0; + +i2c_err: + hw_dbg(hw, "combined i2c access failed with %d\n", status); return status; } @@ -1482,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) IXGBE_MDIO_GLOBAL_ALARM_1_INT))) return status; - /* High temperature failure alarm triggered */ + /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); @@ -1496,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) ixgbe_set_copper_phy_power(hw, false); return IXGBE_ERR_OVERTEMP; } + if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { + /* device fault alarm triggered */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®); + if (status) + return status; + + /* if device fault was due to high temp alarm handle and exit */ + if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { + /* power down the PHY in case the PHY FW didn't */ + ixgbe_set_copper_phy_power(hw, false); + return IXGBE_ERR_OVERTEMP; + } + } /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, @@ -1549,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status) return status; - /* Enables high temperature failure alarm */ + /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); if (status) return status; - reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN; + reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN | + IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, @@ -1765,6 +1743,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; + if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { + speed = IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + return ixgbe_setup_kr_speed_x550em(hw, speed); + } + /* If link is not up, then there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status) @@ -1969,7 +1953,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) { struct ixgbe_phy_info *phy = &hw->phy; - ixgbe_link_speed speed; s32 ret_val; hw->mac.ops.set_lan_id(hw); @@ -1982,13 +1965,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - - /* If internal PHY mode is KR, then initialize KR link */ - if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } } /* Identify the PHY or SFP module */ @@ -2020,14 +1996,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. */ - if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - phy->ops.setup_internal_link = - ixgbe_setup_internal_phy_t_x550em; - } else { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } + phy->ops.setup_internal_link = + ixgbe_setup_internal_phy_t_x550em; /* setup SW LPLU only for first revision */ if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, @@ -2176,13 +2146,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear meaning reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index d3e5f5b37999..c48aef613b0a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -774,7 +774,7 @@ static int ixgbevf_set_coalesce(struct net_device *netdev, adapter->tx_itr_setting = ec->tx_coalesce_usecs; if (adapter->tx_itr_setting == 1) - tx_itr_param = IXGBE_10K_ITR; + tx_itr_param = IXGBE_12K_ITR; else tx_itr_param = adapter->tx_itr_setting; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ec3147279621..68ec7daa04fd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -326,8 +326,7 @@ static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector) #define IXGBE_MIN_RSC_ITR 24 #define IXGBE_100K_ITR 40 #define IXGBE_20K_ITR 200 -#define IXGBE_10K_ITR 400 -#define IXGBE_8K_ITR 500 +#define IXGBE_12K_ITR 336 /* Helper macros to switch between ints/sec and what the register uses. * And yes, it's the same math going both ways. The lowest value diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2955186cd4f6..f098952d4fb4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] = #define DRV_VERSION "2.12.1-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2012 Intel Corporation."; + "Copyright (c) 2009 - 2015 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -96,12 +96,14 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static struct workqueue_struct *ixgbevf_wq; + static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter) { if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && !test_bit(__IXGBEVF_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbevf_wq, &adapter->service_task); } static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) @@ -1138,7 +1140,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter) if (q_vector->tx.ring && !q_vector->rx.ring) { /* Tx only vector */ if (adapter->tx_itr_setting == 1) - q_vector->itr = IXGBE_10K_ITR; + q_vector->itr = IXGBE_12K_ITR; else q_vector->itr = adapter->tx_itr_setting; } else { @@ -1196,7 +1198,7 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, /* simple throttle rate management * 0-20MB/s lowest (100000 ints/s) * 20-100MB/s low (20000 ints/s) - * 100-1249MB/s bulk (8000 ints/s) + * 100-1249MB/s bulk (12000 ints/s) */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; @@ -1247,7 +1249,7 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) break; case bulk_latency: default: - new_itr = IXGBE_8K_ITR; + new_itr = IXGBE_12K_ITR; break; } @@ -1288,7 +1290,7 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -1332,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) int txr_remaining = adapter->num_tx_queues; int i, j; int rqpv, tqpv; - int err = 0; q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -1345,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) for (; txr_idx < txr_remaining; v_start++, txr_idx++) map_vector_to_txq(adapter, v_start, txr_idx); - goto out; + return 0; } /* If we don't have enough vectors for a 1-to-1 @@ -1370,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) } } -out: - return err; + return 0; } /** @@ -1469,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter) { - int err = 0; - - err = ixgbevf_request_msix_irqs(adapter); + int err = ixgbevf_request_msix_irqs(adapter); if (err) hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err); @@ -1830,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int err = -EOPNOTSUPP; + int err; spin_lock_bh(&adapter->mbx_lock); @@ -2046,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; - int err = 0, idx = 0; + int err, idx = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2260,10 +2258,8 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) } if (is_valid_ether_addr(adapter->hw.mac.addr)) { - memcpy(netdev->dev_addr, adapter->hw.mac.addr, - netdev->addr_len); - memcpy(netdev->perm_addr, adapter->hw.mac.addr, - netdev->addr_len); + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } adapter->last_reset = jiffies; @@ -2421,7 +2417,7 @@ err_allocation: static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int err = 0; + int err; int vector, v_budget; /* It's easy to be greedy for MSI-X vectors, but it really @@ -2439,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) */ adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) { - err = -ENOMEM; - goto out; - } + if (!adapter->msix_entries) + return -ENOMEM; for (vector = 0; vector < v_budget; vector++) adapter->msix_entries[vector].entry = vector; err = ixgbevf_acquire_msix_vectors(adapter, v_budget); if (err) - goto out; + return err; err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); if (err) - goto out; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + return err; -out: - return err; + return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); } /** @@ -2659,13 +2650,14 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, hw->mac.addr); } if (!is_valid_ether_addr(netdev->dev_addr)) { dev_info(&pdev->dev, "Assigning random MAC address\n"); eth_hw_addr_random(netdev); - memcpy(hw->mac.addr, netdev->dev_addr, netdev->addr_len); + ether_addr_copy(hw->mac.addr, netdev->dev_addr); + ether_addr_copy(hw->mac.perm_addr, netdev->dev_addr); } /* Enable dynamic interrupt throttling rates */ @@ -3352,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_hdr = 0; + __be16 frag_off; switch (first->protocol) { case htons(ETH_P_IP): @@ -3362,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; + if (likely(skb_network_header_len(skb) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } break; } @@ -3390,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, l3 proto=%x, l4 proto=%x\n", + first->protocol, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -3695,8 +3693,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + ether_addr_copy(hw->mac.addr, addr->sa_data); spin_lock_bh(&adapter->mbx_lock); @@ -4245,15 +4243,17 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { - int ret; - pr_info("%s - version %s\n", ixgbevf_driver_string, ixgbevf_driver_version); pr_info("%s\n", ixgbevf_copyright); + ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); + if (!ixgbevf_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name); + return -ENOMEM; + } - ret = pci_register_driver(&ixgbevf_driver); - return ret; + return pci_register_driver(&ixgbevf_driver); } module_init(ixgbevf_init_module); @@ -4267,6 +4267,10 @@ module_init(ixgbevf_init_module); static void __exit ixgbevf_exit_module(void) { pci_unregister_driver(&ixgbevf_driver); + if (ixgbevf_wq) { + destroy_workqueue(ixgbevf_wq); + ixgbevf_wq = NULL; + } } #ifdef DEBUG diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 427f3605cbfc..61a98f4c5746 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -117,7 +117,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK)) return IXGBE_ERR_INVALID_MAC_ADDR; - ether_addr_copy(hw->mac.perm_addr, addr); + if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK)) + ether_addr_copy(hw->mac.perm_addr, addr); + hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD]; return 0; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4182290fdbcf..4eba2ed53052 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3257,25 +3257,20 @@ static struct platform_driver mv643xx_eth_driver = { }, }; +static struct platform_driver * const drivers[] = { + &mv643xx_eth_shared_driver, + &mv643xx_eth_driver, +}; + static int __init mv643xx_eth_init_module(void) { - int rc; - - rc = platform_driver_register(&mv643xx_eth_shared_driver); - if (!rc) { - rc = platform_driver_register(&mv643xx_eth_driver); - if (rc) - platform_driver_unregister(&mv643xx_eth_shared_driver); - } - - return rc; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(mv643xx_eth_init_module); static void __exit mv643xx_eth_cleanup_module(void) { - platform_driver_unregister(&mv643xx_eth_driver); - platform_driver_unregister(&mv643xx_eth_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mv643xx_eth_cleanup_module); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e84c7f2634d3..15b1f6bbd92d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -36,7 +36,7 @@ /* Registers */ #define MVNETA_RXQ_CONFIG_REG(q) (0x1400 + ((q) << 2)) -#define MVNETA_RXQ_HW_BUF_ALLOC BIT(1) +#define MVNETA_RXQ_HW_BUF_ALLOC BIT(0) #define MVNETA_RXQ_PKT_OFFSET_ALL_MASK (0xf << 8) #define MVNETA_RXQ_PKT_OFFSET_MASK(offs) ((offs) << 8) #define MVNETA_RXQ_THRESHOLD_REG(q) (0x14c0 + ((q) << 2)) @@ -62,6 +62,7 @@ #define MVNETA_WIN_SIZE(w) (0x2204 + ((w) << 3)) #define MVNETA_WIN_REMAP(w) (0x2280 + ((w) << 2)) #define MVNETA_BASE_ADDR_ENABLE 0x2290 +#define MVNETA_ACCESS_PROTECT_ENABLE 0x2294 #define MVNETA_PORT_CONFIG 0x2400 #define MVNETA_UNI_PROMISC_MODE BIT(0) #define MVNETA_DEF_RXQ(q) ((q) << 1) @@ -109,9 +110,17 @@ #define MVNETA_CPU_MAP(cpu) (0x2540 + ((cpu) << 2)) #define MVNETA_CPU_RXQ_ACCESS_ALL_MASK 0x000000ff #define MVNETA_CPU_TXQ_ACCESS_ALL_MASK 0x0000ff00 +#define MVNETA_CPU_RXQ_ACCESS(rxq) BIT(rxq) +#define MVNETA_CPU_TXQ_ACCESS(txq) BIT(txq + 8) #define MVNETA_RXQ_TIME_COAL_REG(q) (0x2580 + ((q) << 2)) -/* Exception Interrupt Port/Queue Cause register */ +/* Exception Interrupt Port/Queue Cause register + * + * Their behavior depend of the mapping done using the PCPX2Q + * registers. For a given CPU if the bit associated to a queue is not + * set, then for the register a read from this CPU will always return + * 0 and a write won't do anything + */ #define MVNETA_INTR_NEW_CAUSE 0x25a0 #define MVNETA_INTR_NEW_MASK 0x25a4 @@ -159,7 +168,7 @@ #define MVNETA_INTR_ENABLE 0x25b8 #define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00 -#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF +#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0x000000ff #define MVNETA_RXQ_CMD 0x2680 #define MVNETA_RXQ_DISABLE_SHIFT 8 @@ -242,6 +251,7 @@ #define MVNETA_VLAN_TAG_LEN 4 #define MVNETA_CPU_D_CACHE_LINE_SIZE 32 +#define MVNETA_TX_CSUM_DEF_SIZE 1600 #define MVNETA_TX_CSUM_MAX_SIZE 9800 #define MVNETA_ACC_MODE_EXT 1 @@ -252,6 +262,11 @@ #define MVNETA_TX_MTU_MAX 0x3ffff +/* The RSS lookup table actually has 256 entries but we do not use + * them yet + */ +#define MVNETA_RSS_LU_TABLE_SIZE 1 + /* TSO header size */ #define TSO_HEADER_SIZE 128 @@ -354,6 +369,7 @@ struct mvneta_port { struct mvneta_tx_queue *txqs; struct net_device *dev; struct notifier_block cpu_notifier; + int rxq_def; /* Core clock */ struct clk *clk; @@ -369,9 +385,11 @@ struct mvneta_port { unsigned int duplex; unsigned int speed; unsigned int tx_csum_limit; - int use_inband_status:1; + unsigned int use_inband_status:1; u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)]; + + u32 indir[MVNETA_RSS_LU_TABLE_SIZE]; }; /* The mvneta_tx_desc and mvneta_rx_desc structures describe the @@ -497,6 +515,9 @@ struct mvneta_tx_queue { /* DMA address of TSO headers */ dma_addr_t tso_hdrs_phys; + + /* Affinity mask for CPUs*/ + cpumask_t affinity_mask; }; struct mvneta_rx_queue { @@ -817,7 +838,13 @@ static void mvneta_port_up(struct mvneta_port *pp) mvreg_write(pp, MVNETA_TXQ_CMD, q_map); /* Enable all initialized RXQs. */ - mvreg_write(pp, MVNETA_RXQ_CMD, BIT(rxq_def)); + for (queue = 0; queue < rxq_number; queue++) { + struct mvneta_rx_queue *rxq = &pp->rxqs[queue]; + + if (rxq->descs != NULL) + q_map |= (1 << queue); + } + mvreg_write(pp, MVNETA_RXQ_CMD, q_map); } /* Stop the Ethernet port activity */ @@ -971,6 +998,44 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue) mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val); } +static void mvneta_set_autoneg(struct mvneta_port *pp, int enable) +{ + u32 val; + + if (enable) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | + MVNETA_GMAC_FORCE_LINK_DOWN | + MVNETA_GMAC_AN_FLOW_CTRL_EN); + val |= MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val |= MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } +} + /* This method sets defaults to the NETA port: * Clears interrupt Cause and Mask registers. * Clears all MAC tables. @@ -985,6 +1050,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) int cpu; int queue; u32 val; + int max_cpu = num_present_cpus(); /* Clear all Cause registers */ mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0); @@ -1000,13 +1066,33 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Enable MBUS Retry bit16 */ mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20); - /* Set CPU queue access map - all CPUs have access to all RX - * queues and to all TX queues + /* Set CPU queue access map. CPUs are assigned to the RX and + * TX queues modulo their number. If there is only one TX + * queue then it is assigned to the CPU associated to the + * default RX queue. */ - for_each_present_cpu(cpu) - mvreg_write(pp, MVNETA_CPU_MAP(cpu), - (MVNETA_CPU_RXQ_ACCESS_ALL_MASK | - MVNETA_CPU_TXQ_ACCESS_ALL_MASK)); + for_each_present_cpu(cpu) { + int rxq_map = 0, txq_map = 0; + int rxq, txq; + + for (rxq = 0; rxq < rxq_number; rxq++) + if ((rxq % max_cpu) == cpu) + rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); + + for (txq = 0; txq < txq_number; txq++) + if ((txq % max_cpu) == cpu) + txq_map |= MVNETA_CPU_TXQ_ACCESS(txq); + + /* With only one TX queue we configure a special case + * which will allow to get all the irq on a single + * CPU + */ + if (txq_number == 1) + txq_map = (cpu == pp->rxq_def) ? + MVNETA_CPU_TXQ_ACCESS(1) : 0; + + mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map); + } /* Reset RX and TX DMAs */ mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET); @@ -1027,7 +1113,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) mvreg_write(pp, MVNETA_ACC_MODE, val); /* Update val of portCfg register accordingly with all RxQueue types */ - val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def); + val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def); mvreg_write(pp, MVNETA_PORT_CONFIG, val); val = 0; @@ -1056,26 +1142,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val &= ~MVNETA_PHY_POLLING_ENABLE; mvreg_write(pp, MVNETA_UNIT_CONTROL, val); - if (pp->use_inband_status) { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | - MVNETA_GMAC_FORCE_LINK_DOWN | - MVNETA_GMAC_AN_FLOW_CTRL_EN); - val |= MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN; - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); - val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); - } else { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN); - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - } - + mvneta_set_autoneg(pp, pp->use_inband_status); mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -1579,12 +1646,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, } skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size); - if (!skb) - goto err_drop_frame; + /* After refill old buffer has to be unmapped regardless + * the skb is successfully built or not. + */ dma_unmap_single(dev->dev.parent, phys_addr, MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); + if (!skb) + goto err_drop_frame; + rcvd_pkts++; rcvd_bytes += rx_bytes; @@ -2076,19 +2147,19 @@ static void mvneta_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* Accept all: Multicast + Unicast */ mvneta_rx_unicast_promisc_set(pp, 1); - mvneta_set_ucast_table(pp, rxq_def); - mvneta_set_special_mcast_table(pp, rxq_def); - mvneta_set_other_mcast_table(pp, rxq_def); + mvneta_set_ucast_table(pp, pp->rxq_def); + mvneta_set_special_mcast_table(pp, pp->rxq_def); + mvneta_set_other_mcast_table(pp, pp->rxq_def); } else { /* Accept single Unicast */ mvneta_rx_unicast_promisc_set(pp, 0); mvneta_set_ucast_table(pp, -1); - mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def); + mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def); if (dev->flags & IFF_ALLMULTI) { /* Accept all multicast */ - mvneta_set_special_mcast_table(pp, rxq_def); - mvneta_set_other_mcast_table(pp, rxq_def); + mvneta_set_special_mcast_table(pp, pp->rxq_def); + mvneta_set_other_mcast_table(pp, pp->rxq_def); } else { /* Accept only initialized multicast */ mvneta_set_special_mcast_table(pp, -1); @@ -2097,7 +2168,7 @@ static void mvneta_set_rx_mode(struct net_device *dev) if (!netdev_mc_empty(dev)) { netdev_for_each_mc_addr(ha, dev) { mvneta_mcast_addr_set(pp, ha->addr, - rxq_def); + pp->rxq_def); } } } @@ -2148,6 +2219,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget) { int rx_done = 0; u32 cause_rx_tx; + int rx_queue; struct mvneta_port *pp = netdev_priv(napi->dev); struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports); @@ -2179,8 +2251,15 @@ static int mvneta_poll(struct napi_struct *napi, int budget) /* For the case where the last mvneta_poll did not process all * RX packets */ + rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); + cause_rx_tx |= port->cause_rx_tx; - rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]); + + if (rx_queue) { + rx_queue = rx_queue - 1; + rx_done = mvneta_rx(pp, budget, &pp->rxqs[rx_queue]); + } + budget -= rx_done; if (budget > 0) { @@ -2297,6 +2376,8 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, static int mvneta_txq_init(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { + int cpu; + txq->size = pp->tx_ring_size; /* A queue must always have room for at least one skb. @@ -2349,6 +2430,14 @@ static int mvneta_txq_init(struct mvneta_port *pp, } mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal); + /* Setup XPS mapping */ + if (txq_number > 1) + cpu = txq->id % num_present_cpus(); + else + cpu = pp->rxq_def % num_present_cpus(); + cpumask_set_cpu(cpu, &txq->affinity_mask); + netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id); + return 0; } @@ -2393,19 +2482,27 @@ static void mvneta_cleanup_txqs(struct mvneta_port *pp) /* Cleanup all Rx queues */ static void mvneta_cleanup_rxqs(struct mvneta_port *pp) { - mvneta_rxq_deinit(pp, &pp->rxqs[rxq_def]); + int queue; + + for (queue = 0; queue < txq_number; queue++) + mvneta_rxq_deinit(pp, &pp->rxqs[queue]); } /* Init all Rx queues */ static int mvneta_setup_rxqs(struct mvneta_port *pp) { - int err = mvneta_rxq_init(pp, &pp->rxqs[rxq_def]); - if (err) { - netdev_err(pp->dev, "%s: can't create rxq=%d\n", - __func__, rxq_def); - mvneta_cleanup_rxqs(pp); - return err; + int queue; + + for (queue = 0; queue < rxq_number; queue++) { + int err = mvneta_rxq_init(pp, &pp->rxqs[queue]); + + if (err) { + netdev_err(pp->dev, "%s: can't create rxq=%d\n", + __func__, queue); + mvneta_cleanup_rxqs(pp); + return err; + } } return 0; @@ -2429,6 +2526,31 @@ static int mvneta_setup_txqs(struct mvneta_port *pp) return 0; } +static void mvneta_percpu_unmask_interrupt(void *arg) +{ + struct mvneta_port *pp = arg; + + /* All the queue are unmasked, but actually only the ones + * maped to this CPU will be unmasked + */ + mvreg_write(pp, MVNETA_INTR_NEW_MASK, + MVNETA_RX_INTR_MASK_ALL | + MVNETA_TX_INTR_MASK_ALL | + MVNETA_MISCINTR_INTR_MASK); +} + +static void mvneta_percpu_mask_interrupt(void *arg) +{ + struct mvneta_port *pp = arg; + + /* All the queue are masked, but actually only the ones + * maped to this CPU will be masked + */ + mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0); + mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0); + mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0); +} + static void mvneta_start_dev(struct mvneta_port *pp) { unsigned int cpu; @@ -2446,11 +2568,10 @@ static void mvneta_start_dev(struct mvneta_port *pp) napi_enable(&port->napi); } - /* Unmask interrupts */ - mvreg_write(pp, MVNETA_INTR_NEW_MASK, - MVNETA_RX_INTR_MASK(rxq_number) | - MVNETA_TX_INTR_MASK(txq_number) | - MVNETA_MISCINTR_INTR_MASK); + /* Unmask interrupts. It has to be done from each CPU */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt, + pp, true); mvreg_write(pp, MVNETA_INTR_MISC_MASK, MVNETA_CAUSE_PHY_STATUS_CHANGE | MVNETA_CAUSE_LINK_CHANGE | @@ -2609,7 +2730,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) mvneta_mac_addr_set(pp, dev->dev_addr, -1); /* Set new addr in hw */ - mvneta_mac_addr_set(pp, sockaddr->sa_data, rxq_def); + mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def); eth_commit_mac_addr_change(dev, addr); return 0; @@ -2726,22 +2847,45 @@ static void mvneta_percpu_disable(void *arg) static void mvneta_percpu_elect(struct mvneta_port *pp) { - int online_cpu_idx, cpu, i = 0; + int online_cpu_idx, max_cpu, cpu, i = 0; - online_cpu_idx = rxq_def % num_online_cpus(); + online_cpu_idx = pp->rxq_def % num_online_cpus(); + max_cpu = num_present_cpus(); for_each_online_cpu(cpu) { + int rxq_map = 0, txq_map = 0; + int rxq; + + for (rxq = 0; rxq < rxq_number; rxq++) + if ((rxq % max_cpu) == cpu) + rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); + if (i == online_cpu_idx) - /* Enable per-CPU interrupt on the one CPU we - * just elected + /* Map the default receive queue queue to the + * elected CPU */ - smp_call_function_single(cpu, mvneta_percpu_enable, - pp, true); + rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def); + + /* We update the TX queue map only if we have one + * queue. In this case we associate the TX queue to + * the CPU bound to the default RX queue + */ + if (txq_number == 1) + txq_map = (i == online_cpu_idx) ? + MVNETA_CPU_TXQ_ACCESS(1) : 0; else - /* Disable per-CPU interrupt on all the other CPU */ - smp_call_function_single(cpu, mvneta_percpu_disable, - pp, true); + txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & + MVNETA_CPU_TXQ_ACCESS_ALL_MASK; + + mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map); + + /* Update the interrupt mask on each CPU according the + * new mapping + */ + smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt, + pp, true); i++; + } }; @@ -2776,12 +2920,22 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb, mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0); napi_enable(&port->napi); + + /* Enable per-CPU interrupts on the CPU that is + * brought up. + */ + smp_call_function_single(cpu, mvneta_percpu_enable, + pp, true); + /* Enable per-CPU interrupt on the one CPU we care * about. */ mvneta_percpu_elect(pp); - /* Unmask all ethernet port interrupts */ + /* Unmask all ethernet port interrupts, as this + * notifier is called for each CPU then the CPU to + * Queue mapping is applied + */ mvreg_write(pp, MVNETA_INTR_NEW_MASK, MVNETA_RX_INTR_MASK(rxq_number) | MVNETA_TX_INTR_MASK(txq_number) | @@ -2832,7 +2986,7 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb, static int mvneta_open(struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); - int ret; + int ret, cpu; pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu); pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) + @@ -2862,8 +3016,13 @@ static int mvneta_open(struct net_device *dev) */ mvneta_percpu_disable(pp); - /* Elect a CPU to handle our RX queue interrupt */ - mvneta_percpu_elect(pp); + /* Enable per-CPU interrupt on all the CPU to handle our RX + * queue interrupts + */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, mvneta_percpu_enable, + pp, true); + /* Register a CPU notifier to handle the case where our CPU * might be taken offline. @@ -2937,10 +3096,43 @@ int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mvneta_port *pp = netdev_priv(dev); + struct phy_device *phydev = pp->phy_dev; - if (!pp->phy_dev) + if (!phydev) return -ENODEV; + if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { + u32 val; + + mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE); + + if (cmd->autoneg == AUTONEG_DISABLE) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | + MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FULL_DUPLEX); + + if (phydev->duplex) + val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; + + if (phydev->speed == SPEED_1000) + val |= MVNETA_GMAC_CONFIG_GMII_SPEED; + else if (phydev->speed == SPEED_100) + val |= MVNETA_GMAC_CONFIG_MII_SPEED; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + } + + pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE); + netdev_info(pp->dev, "autoneg status set to %i\n", + pp->use_inband_status); + + if (netif_running(dev)) { + mvneta_port_down(pp); + mvneta_port_up(pp); + } + } + return phy_ethtool_sset(pp->phy_dev, cmd); } @@ -3092,6 +3284,106 @@ static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } +static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev) +{ + return MVNETA_RSS_LU_TABLE_SIZE; +} + +static int mvneta_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, + u32 *rules __always_unused) +{ + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = rxq_number; + return 0; + case ETHTOOL_GRXFH: + return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } +} + +static int mvneta_config_rss(struct mvneta_port *pp) +{ + int cpu; + u32 val; + + netif_tx_stop_all_queues(pp->dev); + + for_each_online_cpu(cpu) + smp_call_function_single(cpu, mvneta_percpu_mask_interrupt, + pp, true); + + /* We have to synchronise on the napi of each CPU */ + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *pcpu_port = + per_cpu_ptr(pp->ports, cpu); + + napi_synchronize(&pcpu_port->napi); + napi_disable(&pcpu_port->napi); + } + + pp->rxq_def = pp->indir[0]; + + /* Update unicast mapping */ + mvneta_set_rx_mode(pp->dev); + + /* Update val of portCfg register accordingly with all RxQueue types */ + val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def); + mvreg_write(pp, MVNETA_PORT_CONFIG, val); + + /* Update the elected CPU matching the new rxq_def */ + mvneta_percpu_elect(pp); + + /* We have to synchronise on the napi of each CPU */ + for_each_online_cpu(cpu) { + struct mvneta_pcpu_port *pcpu_port = + per_cpu_ptr(pp->ports, cpu); + + napi_enable(&pcpu_port->napi); + } + + netif_tx_start_all_queues(pp->dev); + + return 0; +} + +static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mvneta_port *pp = netdev_priv(dev); + /* We require at least one supported parameter to be changed + * and no change in any of the unsupported parameters + */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!indir) + return 0; + + memcpy(pp->indir, indir, MVNETA_RSS_LU_TABLE_SIZE); + + return mvneta_config_rss(pp); +} + +static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + if (!indir) + return 0; + + memcpy(indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE); + + return 0; +} + static const struct net_device_ops mvneta_netdev_ops = { .ndo_open = mvneta_open, .ndo_stop = mvneta_stop, @@ -3116,6 +3408,10 @@ const struct ethtool_ops mvneta_eth_tool_ops = { .get_strings = mvneta_ethtool_get_strings, .get_ethtool_stats = mvneta_ethtool_get_stats, .get_sset_count = mvneta_ethtool_get_sset_count, + .get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size, + .get_rxnfc = mvneta_ethtool_get_rxnfc, + .get_rxfh = mvneta_ethtool_get_rxfh, + .set_rxfh = mvneta_ethtool_set_rxfh, }; /* Initialize hw */ @@ -3191,6 +3487,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, } mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable); + mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect); } /* Power up the port */ @@ -3223,9 +3520,6 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) return -EINVAL; } - if (pp->use_inband_status) - ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE; - /* Cancel Port Reset */ ctrl &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl); @@ -3250,6 +3544,7 @@ static int mvneta_probe(struct platform_device *pdev) char hw_mac_addr[ETH_ALEN]; const char *mac_from; const char *managed; + int tx_csum_limit; int phy_mode; int err; int cpu; @@ -3306,6 +3601,10 @@ static int mvneta_probe(struct platform_device *pdev) strcmp(managed, "in-band-status") == 0); pp->cpu_notifier.notifier_call = mvneta_percpu_notifier; + pp->rxq_def = rxq_def; + + pp->indir[0] = rxq_def; + pp->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pp->clk)) { err = PTR_ERR(pp->clk); @@ -3350,8 +3649,21 @@ static int mvneta_probe(struct platform_device *pdev) } } - if (of_device_is_compatible(dn, "marvell,armada-370-neta")) - pp->tx_csum_limit = 1600; + if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) { + if (tx_csum_limit < 0 || + tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + dev_info(&pdev->dev, + "Wrong TX csum limit in DT, set to %dB\n", + MVNETA_TX_CSUM_DEF_SIZE); + } + } else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) { + tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE; + } else { + tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE; + } + + pp->tx_csum_limit = tx_csum_limit; pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 603d1c3d3b2e..4696053165f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work) eqe = next_slave_event_eqe(slave_eq)) { slave = eqe->slave_id; + if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE && + eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN && + mlx4_is_bonded(dev)) { + struct mlx4_port_cap port_cap; + + if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state) + goto consume; + + if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state) + goto consume; + } /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i <= dev->persist->num_vfs; i++) { @@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work) mlx4_warn(dev, "Failed to generate event for slave %d\n", slave); } +consume: ++slave_eq->cons; } } @@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) @@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } else { /* IB port */ @@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (i == mlx4_master_func_num(dev)) continue; @@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e83fde..2c2baab9d880 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + port_cap->link_state = (field & 0x80) >> 7; port_cap->supported_port_types = field & 3; port_cap->suggested_type = (field >> 3) & 1; port_cap->default_sense = (field >> 4) & 1; @@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, port_type |= MLX4_PORT_LINK_UP_MASK; else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) port_type &= ~MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) { + int other_port = (port == 1) ? 2 : 1; + struct mlx4_port_cap port_cap; + + err = mlx4_QUERY_PORT(dev, other_port, &port_cap); + if (err) + goto out; + port_type |= (port_cap.link_state << 7); + } MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); @@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } - +out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 08de5555c2f4..7ea258af636a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg { }; struct mlx4_port_cap { + u8 link_state; u8 supported_port_types; u8 suggested_type; u8 default_sense; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 31c491e02e69..f1b6d219e445 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1221,6 +1221,76 @@ err_set_port: return err ? err : count; } +/* bond for multi-function device */ +#define MAX_MF_BOND_ALLOWED_SLAVES 63 +static int mlx4_mf_bond(struct mlx4_dev *dev) +{ + int err = 0; + struct mlx4_slaves_pport slaves_port1; + struct mlx4_slaves_pport slaves_port2; + DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); + + slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); + slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); + bitmap_and(slaves_port_1_2, + slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1); + + /* only single port vfs are allowed */ + if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); + return -EINVAL; + } + + /* limit on maximum allowed VFs */ + if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + + bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) > + MAX_MF_BOND_ALLOWED_SLAVES) + return -EINVAL; + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); + return -EINVAL; + } + + err = mlx4_bond_mac_table(dev); + if (err) + return err; + err = mlx4_bond_vlan_table(dev); + if (err) + goto err1; + err = mlx4_bond_fs_rules(dev); + if (err) + goto err2; + + return 0; +err2: + (void)mlx4_unbond_vlan_table(dev); +err1: + (void)mlx4_unbond_mac_table(dev); + return err; +} + +static int mlx4_mf_unbond(struct mlx4_dev *dev) +{ + int ret, ret1; + + ret = mlx4_unbond_fs_rules(dev); + if (ret) + mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); + ret1 = mlx4_unbond_mac_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); + ret = ret1; + } + ret1 = mlx4_unbond_vlan_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1); + ret = ret1; + } + return ret; +} + int mlx4_bond(struct mlx4_dev *dev) { int ret = 0; @@ -1228,16 +1298,23 @@ int mlx4_bond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (!mlx4_is_bonded(dev)) + if (!mlx4_is_bonded(dev)) { ret = mlx4_do_bond(dev, true); - else - ret = 0; + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + if (!ret && mlx4_is_master(dev)) { + ret = mlx4_mf_bond(dev); + if (ret) { + mlx4_err(dev, "bond for multifunction failed\n"); + mlx4_do_bond(dev, false); + } + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to bond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is bonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_bond); @@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (mlx4_is_bonded(dev)) + if (mlx4_is_bonded(dev)) { + int ret2 = 0; + ret = mlx4_do_bond(dev, false); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + if (mlx4_is_master(dev)) + ret2 = mlx4_mf_unbond(dev); + if (ret2) { + mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2); + ret = ret2; + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to unbond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is unbonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_unbond); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e1cf9036af22..2404c22ad2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -736,6 +736,7 @@ struct mlx4_catas_err { struct mlx4_mac_table { __be64 entries[MLX4_MAX_MAC_NUM]; int refs[MLX4_MAX_MAC_NUM]; + bool is_dup[MLX4_MAX_MAC_NUM]; struct mutex mutex; int total; int max; @@ -758,6 +759,7 @@ struct mlx4_roce_gid_table { struct mlx4_vlan_table { __be32 entries[MLX4_MAX_VLAN_NUM]; int refs[MLX4_MAX_VLAN_NUM]; + int is_dup[MLX4_MAX_VLAN_NUM]; struct mutex mutex; int total; int max; @@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev, struct mlx4_roce_gid_table *table); void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +int mlx4_bond_vlan_table(struct mlx4_dev *dev); +int mlx4_unbond_vlan_table(struct mlx4_dev *dev); +int mlx4_bond_mac_table(struct mlx4_dev *dev); +int mlx4_unbond_mac_table(struct mlx4_dev *dev); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ @@ -1385,6 +1391,8 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); int mlx4_do_bond(struct mlx4_dev *dev, bool enable); +int mlx4_bond_fs_rules(struct mlx4_dev *dev); +int mlx4_unbond_fs_rules(struct mlx4_dev *dev); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c2b21313dba7..f2550425c251 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -61,6 +61,7 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = 1 << dev->caps.log_num_macs; table->total = 0; @@ -74,6 +75,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR; table->total = 0; @@ -159,21 +161,94 @@ int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) } EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); +static bool mlx4_need_mf_bond(struct mlx4_dev *dev) +{ + int i, num_eth_ports = 0; + + if (!mlx4_is_mfunc(dev)) + return false; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + ++num_eth_ports; + + return (num_eth_ports == 2) ? true : false; +} + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; struct mlx4_mac_table *table = &info->mac_table; int i, err = 0; int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n", + (unsigned long long)mac, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; - mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n", - (unsigned long long) mac, port); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))) + index_at_port = i; + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + + /* check that same mac is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the mac is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the mac is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the mac at the same index. + * Otherwise, you cannot bond (primary contains a different mac + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } - mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { if (!table->refs[i]) { if (free < 0) free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } continue; } @@ -182,10 +257,30 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) /* MAC already registered, increment ref count */ err = i; ++table->refs[i]; + if (dup) { + u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]); + + if (dup_mac != mac || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n", + mac, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + mlx4_dbg(dev, "Free MAC index is %d\n", free); if (table->total == table->max) { @@ -205,10 +300,35 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) goto out; } table->refs[free] = 1; - err = free; + table->is_dup[free] = false; ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + err = free; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_register_mac); @@ -255,6 +375,9 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) struct mlx4_port_info *info; struct mlx4_mac_table *table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; if (port < 1 || port > dev->caps.num_ports) { mlx4_warn(dev, "invalid port number (%d), aborting...\n", port); @@ -262,22 +385,59 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) } info = &mlx4_priv(dev)->port[port]; table = &info->mac_table; - mutex_lock(&table->mutex); + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + index = find_index(dev, table, mac); if (validate_index(dev, table, index)) goto out; - if (--table->refs[index]) { + + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n", index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_mac_table(dev, port, table->entries); + if (mlx4_set_port_mac_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port); --table->total; + + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port); + + --table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); @@ -311,9 +471,22 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) struct mlx4_mac_table *table = &info->mac_table; int index = qpn - info->base_qpn; int err = 0; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; /* CX1 doesn't support multi-functions */ - mutex_lock(&table->mutex); + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } err = validate_index(dev, table, index); if (err) @@ -326,9 +499,30 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac); table->entries[index] = 0; + } else { + if (dup) { + dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n", + (unsigned long long)new_mac); + dup_table->entries[index] = 0; + } + } } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_replace_mac); @@ -380,8 +574,28 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int i, err = 0; int free = -1; - - mutex_lock(&table->mutex); + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n", + vlan, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } if (table->total == table->max) { /* No free vlan entries */ @@ -389,22 +603,85 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, goto out; } + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) + index_at_port = i; + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + /* check that same vlan is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the vlan is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the vlan is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the vlan at the same index. + * Otherwise, you cannot bond (primary contains a different vlan + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { - if (free < 0 && (table->refs[i] == 0)) { - free = i; - continue; + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } } - if (table->refs[i] && + if ((table->refs[i] || table->is_dup[i]) && (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) { /* Vlan already registered, increase references count */ + mlx4_dbg(dev, "vlan %u is already registered.\n", vlan); *index = i; ++table->refs[i]; + if (dup) { + u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]); + + if (dup_vlan != vlan || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n", + vlan, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + if (free < 0) { err = -ENOMEM; goto out; @@ -412,6 +689,7 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, /* Register new VLAN */ table->refs[free] = 1; + table->is_dup[free] = false; table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); err = mlx4_set_port_vlan_table(dev, port, table->entries); @@ -421,11 +699,35 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, table->entries[free] = 0; goto out; } + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } *index = free; - ++table->total; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } @@ -455,8 +757,22 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } - mutex_lock(&table->mutex); if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); goto out; @@ -467,16 +783,38 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) goto out; } - if (--table->refs[index]) { + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n", table->refs[index], index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_vlan_table(dev, port, table->entries); + if (mlx4_set_port_vlan_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port); --table->total; + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port); + --dup_table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) @@ -495,6 +833,220 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); +int mlx4_bond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror MAC tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_bond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror VLAN tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 6fec3e993d02..da7f578a3fe1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -222,6 +222,13 @@ enum res_fs_rule_states { struct res_fs_rule { struct res_common com; int qpn; + /* VF DMFS mbox with port flipped */ + void *mirr_mbox; + /* > 0 --> apply mirror when getting into HA mode */ + /* = 0 --> un-apply mirror when getting out of HA mode */ + u32 mirr_mbox_size; + struct list_head mirr_list; + u64 mirr_rule_id; }; static void *res_tracker_lookup(struct rb_root *root, u64 res_id) @@ -4284,6 +4291,22 @@ err_mac: return err; } +static u32 qp_attach_mbox_size(void *mbox) +{ + u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl); + struct _rule_hw *rule_header; + + rule_header = (struct _rule_hw *)(mbox + size); + + while (rule_header->size) { + size += rule_header->size * sizeof(u32); + rule_header += 1; + } + return size; +} + +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule); + int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4300,6 +4323,8 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; + struct res_fs_rule *rrule; + u32 mbox_size; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4328,7 +4353,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } break; case MLX4_NET_TRANS_RULE_ID_IB: @@ -4339,7 +4364,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n"); if (add_eth_header(dev, slave, inbox, rlist, header_id)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; @@ -4347,7 +4372,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, default: pr_err("Corrupted mailbox\n"); err = -EINVAL; - goto err_put; + goto err_put_qp; } execute: @@ -4356,23 +4381,69 @@ execute: MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - goto err_put; + goto err_put_qp; + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources\n"); - /* detach rule*/ + goto err_detach; + } + + err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule); + if (err) + goto err_detach; + + mbox_size = qp_attach_mbox_size(inbox->buf); + rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL); + if (!rrule->mirr_mbox) { + err = -ENOMEM; + goto err_put_rule; + } + rrule->mirr_mbox_size = mbox_size; + rrule->mirr_rule_id = 0; + memcpy(rrule->mirr_mbox, inbox->buf, mbox_size); + + /* set different port */ + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox; + if (ctrl->port == 1) + ctrl->port = 2; + else + ctrl->port = 1; + + if (mlx4_is_bonded(dev)) + mlx4_do_mirror_rule(dev, rrule); + + atomic_inc(&rqp->ref_count); + +err_put_rule: + put_res(dev, slave, vhcr->out_param, RES_FS_RULE); +err_detach: + /* detach rule on error */ + if (err) mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - goto err_put; - } - atomic_inc(&rqp->ref_count); -err_put: +err_put_qp: put_res(dev, slave, qpn, RES_QP); return err; } +static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + int err; + + err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources\n"); + return err; + } + + mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + return 0; +} + int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4382,6 +4453,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, int err; struct res_qp *rqp; struct res_fs_rule *rrule; + u64 mirr_reg_id; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4390,12 +4462,30 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); if (err) return err; + + if (!rrule->mirr_mbox) { + mlx4_err(dev, "Mirror rules cannot be removed explicitly\n"); + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + return -EINVAL; + } + mirr_reg_id = rrule->mirr_rule_id; + kfree(rrule->mirr_mbox); + /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); if (err) return err; + if (mirr_reg_id && mlx4_is_bonded(dev)) { + err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule); + if (err) { + mlx4_err(dev, "Fail to get resource of mirror rule\n"); + } else { + put_res(dev, slave, mirr_reg_id, RES_FS_RULE); + mlx4_undo_mirror_rule(dev, rrule); + } + } err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to remove flow steering resources\n"); @@ -4833,6 +4923,91 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) spin_unlock_irq(mlx4_tlock(dev)); } +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct res_fs_rule *mirr_rule; + u64 reg_id; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (!fs_rule->mirr_mbox) { + mlx4_err(dev, "rule mirroring mailbox is null\n"); + return -EINVAL; + } + memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); + err = mlx4_cmd_imm(dev, mailbox->dma, ®_id, fs_rule->mirr_mbox_size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (err) + goto err; + + err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn); + if (err) + goto err_detach; + + err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule); + if (err) + goto err_rem; + + fs_rule->mirr_rule_id = reg_id; + mirr_rule->mirr_rule_id = 0; + mirr_rule->mirr_mbox_size = 0; + mirr_rule->mirr_mbox = NULL; + put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE); + + return 0; +err_rem: + rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0); +err_detach: + mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +err: + return err; +} + +static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct rb_root *root = &tracker->res_tree[RES_FS_RULE]; + struct rb_node *p; + struct res_fs_rule *fs_rule; + int err = 0; + LIST_HEAD(mirr_list); + + for (p = rb_first(root); p; p = rb_next(p)) { + fs_rule = rb_entry(p, struct res_fs_rule, com.node); + if ((bond && fs_rule->mirr_mbox_size) || + (!bond && !fs_rule->mirr_mbox_size)) + list_add_tail(&fs_rule->mirr_list, &mirr_list); + } + + list_for_each_entry(fs_rule, &mirr_list, mirr_list) { + if (bond) + err += mlx4_do_mirror_rule(dev, fs_rule); + else + err += mlx4_undo_mirror_rule(dev, fs_rule); + } + return err; +} + +int mlx4_bond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, true); +} + +int mlx4_unbond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, false); +} + static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5..fe11e967095f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ - en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ + en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 69f1c1a412b4..f689ce580b44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -64,6 +64,8 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_NUM_MAIN_GROUPS 9 + static const char vport_strings[][ETH_GSTRING_LEN] = { /* vport statistics */ "rx_packets", @@ -442,7 +444,7 @@ enum mlx5e_rqt_ix { struct mlx5e_eth_addr_info { u8 addr[ETH_ALEN + 2]; u32 tt_vec; - u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ + struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) @@ -465,15 +467,23 @@ enum { }; struct mlx5e_vlan_db { - u32 active_vlans_ft_ix[VLAN_N_VID]; - u32 untagged_rule_ft_ix; - u32 any_vlan_rule_ft_ix; + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; + struct mlx5_flow_rule *untagged_rule; + struct mlx5_flow_rule *any_vlan_rule; bool filter_disabled; }; struct mlx5e_flow_table { - void *vlan; - void *main; + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_flow_tables { + struct mlx5_flow_namespace *ns; + struct mlx5e_flow_table vlan; + struct mlx5e_flow_table main; }; struct mlx5e_priv { @@ -496,7 +506,7 @@ struct mlx5e_priv { u32 rqtn[MLX5E_NUM_RQT]; u32 tirn[MLX5E_NUM_TT]; - struct mlx5e_flow_table ft; + struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c deleted file mode 100644 index 22d603f78273..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ /dev/null @@ -1,907 +0,0 @@ -/* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/list.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/tcp.h> -#include <linux/mlx5/flow_table.h> -#include "en.h" - -enum { - MLX5E_FULLMATCH = 0, - MLX5E_ALLMULTI = 1, - MLX5E_PROMISC = 2, -}; - -enum { - MLX5E_UC = 0, - MLX5E_MC_IPV4 = 1, - MLX5E_MC_IPV6 = 2, - MLX5E_MC_OTHER = 3, -}; - -enum { - MLX5E_ACTION_NONE = 0, - MLX5E_ACTION_ADD = 1, - MLX5E_ACTION_DEL = 2, -}; - -struct mlx5e_eth_addr_hash_node { - struct hlist_node hlist; - u8 action; - struct mlx5e_eth_addr_info ai; -}; - -static inline int mlx5e_hash_eth_addr(u8 *addr) -{ - return addr[5]; -} - -static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) -{ - struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); - int found = 0; - - hlist_for_each_entry(hn, &hash[ix], hlist) - if (ether_addr_equal_64bits(hn->ai.addr, addr)) { - found = 1; - break; - } - - if (found) { - hn->action = MLX5E_ACTION_NONE; - return; - } - - hn = kzalloc(sizeof(*hn), GFP_ATOMIC); - if (!hn) - return; - - ether_addr_copy(hn->ai.addr, addr); - hn->action = MLX5E_ACTION_ADD; - - hlist_add_head(&hn->hlist, &hash[ix]); -} - -static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) -{ - hlist_del(&hn->hlist); - kfree(hn); -} - -static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai) -{ - void *ft = priv->ft.main; - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_table_entry(ft, - ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); - - if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); -} - -static int mlx5e_get_eth_addr_type(u8 *addr) -{ - if (is_unicast_ether_addr(addr)) - return MLX5E_UC; - - if ((addr[0] == 0x01) && - (addr[1] == 0x00) && - (addr[2] == 0x5e) && - !(addr[3] & 0x80)) - return MLX5E_MC_IPV4; - - if ((addr[0] == 0x33) && - (addr[1] == 0x33)) - return MLX5E_MC_IPV6; - - return MLX5E_MC_OTHER; -} - -static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) -{ - int eth_addr_type; - u32 ret; - - switch (type) { - case MLX5E_FULLMATCH: - eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); - switch (eth_addr_type) { - case MLX5E_UC: - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - case MLX5E_MC_IPV4: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV4) | - 0; - break; - - case MLX5E_MC_IPV6: - ret = - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV6) | - 0; - break; - - case MLX5E_MC_OTHER: - ret = - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - break; - - case MLX5E_ALLMULTI: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - default: /* MLX5E_PROMISC */ - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - return ret; -} - -static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type, - void *flow_context, void *match_criteria) -{ - u8 match_criteria_enable = 0; - void *match_value; - void *dest; - u8 *dmac; - u8 *match_criteria_dmac; - void *ft = priv->ft.main; - u32 *tirn = priv->tirn; - u32 *ft_ix; - u32 tt_vec; - int err; - - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dmac = MLX5_ADDR_OF(fte_match_param, match_value, - outer_headers.dmac_47_16); - match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers.dmac_47_16); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); - - switch (type) { - case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - memset(match_criteria_dmac, 0xff, ETH_ALEN); - ether_addr_copy(dmac, ai->addr); - break; - - case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - match_criteria_dmac[0] = 0x01; - dmac[0] = 0x01; - break; - - case MLX5E_PROMISC: - break; - } - - tt_vec = mlx5e_get_tt_vec(ai, type); - - ft_ix = &ai->ft_ix[MLX5E_TT_ANY]; - if (tt_vec & BIT(MLX5E_TT_ANY)) { - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_ANY]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_ANY); - } - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ethertype); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4]; - if (tt_vec & BIT(MLX5E_TT_IPV4)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6]; - if (tt_vec & BIT(MLX5E_TT_IPV6)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6); - } - - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_UDP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_UDP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_UDP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_TCP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_TCP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_TCP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_AH); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]; - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]; - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_AH]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); - } - - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_ESP); - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]; - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); - } - - ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]; - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETH_P_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_IPSEC_ESP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, - ft_ix); - if (err) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); - } - - return 0; - -err_del_ai: - mlx5e_del_eth_addr_from_flow_table(priv, ai); - - return err; -} - -static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type) -{ - u32 *flow_context; - u32 *match_criteria; - int err; - - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_eth_addr_rule_out; - } - - err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, - match_criteria); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); - -add_eth_addr_rule_out: - kvfree(match_criteria); - kvfree(flow_context); - return err; -} - -enum mlx5e_vlan_rule_type { - MLX5E_VLAN_RULE_TYPE_UNTAGGED, - MLX5E_VLAN_RULE_TYPE_ANY_VID, - MLX5E_VLAN_RULE_TYPE_MATCH_VID, -}; - -static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) -{ - u8 match_criteria_enable = 0; - u32 *flow_context; - void *match_value; - void *dest; - u32 *match_criteria; - u32 *ft_ix; - int err; - - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; - } - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); - MLX5_SET(dest_format_struct, dest, destination_id, - mlx5_get_flow_table_id(priv->ft.main)); - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.vlan_tag); - - switch (rule_type) { - case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - ft_ix = &priv->vlan.untagged_rule_ft_ix; - break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - ft_ix = &priv->vlan.any_vlan_rule_ft_ix; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); - break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; - MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, - 1); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.first_vid); - MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, - vid); - break; - } - - err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, - match_criteria, flow_context, ft_ix); - if (err) - netdev_err(priv->netdev, "%s: failed\n", __func__); - -add_vlan_rule_out: - kvfree(match_criteria); - kvfree(flow_context); - return err; -} - -static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) -{ - switch (rule_type) { - case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.untagged_rule_ft_ix); - break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.any_vlan_rule_ft_ix); - break; - case MLX5E_VLAN_RULE_TYPE_MATCH_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.active_vlans_ft_ix[vid]); - break; - } -} - -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) -{ - if (!priv->vlan.filter_disabled) - return; - - priv->vlan.filter_disabled = false; - if (priv->netdev->flags & IFF_PROMISC) - return; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); -} - -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) -{ - if (priv->vlan.filter_disabled) - return; - - priv->vlan.filter_disabled = true; - if (priv->netdev->flags & IFF_PROMISC) - return; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); -} - -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); -} - -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); - - return 0; -} - -#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ - hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) - -static void mlx5e_execute_action(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_hash_node *hn) -{ - switch (hn->action) { - case MLX5E_ACTION_ADD: - mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); - hn->action = MLX5E_ACTION_NONE; - break; - - case MLX5E_ACTION_DEL: - mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); - mlx5e_del_eth_addr_from_hash(hn); - break; - } -} - -static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) -{ - struct net_device *netdev = priv->netdev; - struct netdev_hw_addr *ha; - - netif_addr_lock_bh(netdev); - - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, - priv->netdev->dev_addr); - - netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); - - netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); - - netif_addr_unlock_bh(netdev); -} - -static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) -{ - struct mlx5e_eth_addr_hash_node *hn; - struct hlist_node *tmp; - int i; - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - mlx5e_execute_action(priv, hn); - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - mlx5e_execute_action(priv, hn); -} - -static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) -{ - struct mlx5e_eth_addr_hash_node *hn; - struct hlist_node *tmp; - int i; - - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - hn->action = MLX5E_ACTION_DEL; - - if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) - mlx5e_sync_netdev_addr(priv); - - mlx5e_apply_netdev_addr(priv); -} - -void mlx5e_set_rx_mode_work(struct work_struct *work) -{ - struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, - set_rx_mode_work); - - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; - struct net_device *ndev = priv->netdev; - - bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); - bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); - bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); - bool broadcast_enabled = rx_mode_enable; - - bool enable_promisc = !ea->promisc_enabled && promisc_enabled; - bool disable_promisc = ea->promisc_enabled && !promisc_enabled; - bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; - bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; - bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; - bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; - - if (enable_promisc) { - mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->vlan.filter_disabled) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); - } - if (enable_allmulti) - mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); - if (enable_broadcast) - mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); - - mlx5e_handle_netdev_addr(priv); - - if (disable_broadcast) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); - if (disable_allmulti) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); - if (disable_promisc) { - if (!priv->vlan.filter_disabled) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); - mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); - } - - ea->promisc_enabled = promisc_enabled; - ea->allmulti_enabled = allmulti_enabled; - ea->broadcast_enabled = broadcast_enabled; -} - -void mlx5e_init_eth_addr(struct mlx5e_priv *priv) -{ - ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); -} - -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) -{ - struct mlx5_flow_table_group *g; - u8 *dmac; - - g = kcalloc(9, sizeof(*g), GFP_KERNEL); - if (!g) - return -ENOMEM; - - g[0].log_sz = 3; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ip_protocol); - - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.ethertype); - - g[2].log_sz = 0; - - g[3].log_sz = 14; - g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ip_protocol); - - g[4].log_sz = 13; - g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, - outer_headers.ethertype); - - g[5].log_sz = 11; - g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, - outer_headers.dmac_47_16); - memset(dmac, 0xff, ETH_ALEN); - - g[6].log_sz = 2; - g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ip_protocol); - - g[7].log_sz = 1; - g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, - outer_headers.ethertype); - - g[8].log_sz = 0; - g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, - outer_headers.dmac_47_16); - dmac[0] = 0x01; - priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 9, g); - kfree(g); - - return priv->ft.main ? 0 : -ENOMEM; -} - -static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) -{ - mlx5_destroy_flow_table(priv->ft.main); -} - -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) -{ - struct mlx5_flow_table_group *g; - - g = kcalloc(2, sizeof(*g), GFP_KERNEL); - if (!g) - return -ENOMEM; - - g[0].log_sz = 12; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.first_vid); - - /* untagged + any vlan id */ - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.vlan_tag); - - priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 2, g); - - kfree(g); - return priv->ft.vlan ? 0 : -ENOMEM; -} - -static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) -{ - mlx5_destroy_flow_table(priv->ft.vlan); -} - -int mlx5e_create_flow_tables(struct mlx5e_priv *priv) -{ - int err; - - err = mlx5e_create_main_flow_table(priv); - if (err) - return err; - - err = mlx5e_create_vlan_flow_table(priv); - if (err) - goto err_destroy_main_flow_table; - - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_table; - - return 0; - -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); - -err_destroy_main_flow_table: - mlx5e_destroy_main_flow_table(priv); - - return err; -} - -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) -{ - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_vlan_flow_table(priv); - mlx5e_destroy_main_flow_table(priv); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c new file mode 100644 index 000000000000..80d81abc4820 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -0,0 +1,1224 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/list.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int mlx5e_hash_eth_addr(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); + + if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); + + if (ai->tt_vec & BIT(MLX5E_TT_ANY)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); +} + +static int mlx5e_get_eth_addr_type(u8 *addr) +{ + if (is_unicast_ether_addr(addr)) + return MLX5E_UC; + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return MLX5E_MC_IPV4; + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return MLX5E_MC_IPV6; + + return MLX5E_MC_OTHER; +} + +static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV6) | + 0; + break; + + case MLX5E_MC_OTHER: + ret = + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + break; + + case MLX5E_ALLMULTI: + ret = + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + BIT(MLX5E_TT_IPV4_TCP) | + BIT(MLX5E_TT_IPV6_TCP) | + BIT(MLX5E_TT_IPV4_UDP) | + BIT(MLX5E_TT_IPV6_UDP) | + BIT(MLX5E_TT_IPV4_IPSEC_AH) | + BIT(MLX5E_TT_IPV6_IPSEC_AH) | + BIT(MLX5E_TT_IPV4_IPSEC_ESP) | + BIT(MLX5E_TT_IPV6_IPSEC_ESP) | + BIT(MLX5E_TT_IPV4) | + BIT(MLX5E_TT_IPV6) | + BIT(MLX5E_TT_ANY) | + 0; + break; + } + + return ret; +} + +static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, + int type, u32 *mc, u32 *mv) +{ + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + struct mlx5_flow_rule **rule_p; + struct mlx5_flow_table *ft = priv->fts.main.t; + u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, + outer_headers.dmac_47_16); + u32 *tirn = priv->tirn; + u32 tt_vec; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + if (tt_vec & BIT(MLX5E_TT_ANY)) { + rule_p = &ai->ft_rule[MLX5E_TT_ANY]; + dest.tir_num = tirn[MLX5E_TT_ANY]; + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + if (tt_vec & BIT(MLX5E_TT_IPV4)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; + dest.tir_num = tirn[MLX5E_TT_IPV4]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; + dest.tir_num = tirn[MLX5E_TT_IPV6]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IP); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETH_P_IPV6); + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); + } + + return 0; + +err_del_ai: + err = PTR_ERR(*rule_p); + *rule_p = NULL; + mlx5e_del_eth_addr_from_flow_table(priv, ai); + + return err; +} + +static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, + match_value); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, u32 *mc, u32 *mv) +{ + struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + struct mlx5_flow_rule **rule_p; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fts.main.t; + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + rule_p = &priv->vlan.untagged_rule; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + rule_p = &priv->vlan.any_vlan_rule; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + rule_p = &priv->vlan.active_vlans_rule[vid]; + MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); + break; + } + + *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + mlx5e_vport_context_update_vlans(priv); + + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, + match_value); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + if (priv->vlan.untagged_rule) { + mlx5_del_flow_rule(priv->vlan.untagged_rule); + priv->vlan.untagged_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + if (priv->vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->vlan.any_vlan_rule); + priv->vlan.any_vlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5e_vport_context_update_vlans(priv); + if (priv->vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); + priv->vlan.active_vlans_rule[vid] = NULL; + } + mlx5e_vport_context_update_vlans(priv); + break; + } +} + +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + if (!priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = false; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + if (priv->vlan.filter_disabled) + return; + + priv->vlan.filter_disabled = true; + if (priv->netdev->flags & IFF_PROMISC) + return; + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + set_bit(vid, priv->vlan.active_vlans); + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + clear_bit(vid, priv->vlan.active_vlans); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + + return 0; +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + } +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) { + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->vlan.filter_disabled) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) { + if (!priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + } + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); +} + +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); +} + +#define MLX5E_MAIN_GROUP0_SIZE BIT(3) +#define MLX5E_MAIN_GROUP1_SIZE BIT(1) +#define MLX5E_MAIN_GROUP2_SIZE BIT(0) +#define MLX5E_MAIN_GROUP3_SIZE BIT(14) +#define MLX5E_MAIN_GROUP4_SIZE BIT(13) +#define MLX5E_MAIN_GROUP5_SIZE BIT(11) +#define MLX5E_MAIN_GROUP6_SIZE BIT(2) +#define MLX5E_MAIN_GROUP7_SIZE BIT(1) +#define MLX5E_MAIN_GROUP8_SIZE BIT(0) +#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ + MLX5E_MAIN_GROUP1_SIZE +\ + MLX5E_MAIN_GROUP2_SIZE +\ + MLX5E_MAIN_GROUP3_SIZE +\ + MLX5E_MAIN_GROUP4_SIZE +\ + MLX5E_MAIN_GROUP5_SIZE +\ + MLX5E_MAIN_GROUP6_SIZE +\ + MLX5E_MAIN_GROUP7_SIZE +\ + MLX5E_MAIN_GROUP8_SIZE) + +static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.outer_headers.dmac_47_16); + int err; + int ix = 0; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP4_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + eth_broadcast_addr(dmac); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP5_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP6_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP7_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP8_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_main_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.main; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_main_flow_table; + } + + err = mlx5e_create_main_groups(ft); + if (err) + goto err_free_g; + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_main_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fts.main); +} + +#define MLX5E_NUM_VLAN_GROUPS 2 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE) + +static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_vlan_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.vlan; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_flow_table; + } + + err = mlx5e_create_vlan_groups(ft); + if (err) + goto err_free_g; + + return 0; + +err_free_g: + kfree(ft->g); + +err_destroy_vlan_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fts.vlan); +} + +int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +{ + int err; + + priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fts.ns) + return -EINVAL; + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + return err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + goto err_destroy_vlan_flow_table; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_main_flow_table; + + return 0; + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); +err_destroy_vlan_flow_table: + mlx5e_destroy_vlan_flow_table(priv); + + return err; +} + +void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + mlx5e_destroy_main_flow_table(priv); + mlx5e_destroy_vlan_flow_table(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f6a8cc787603..d4601a564699 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -30,8 +30,9 @@ * SOFTWARE. */ -#include <linux/mlx5/flow_table.h> +#include <linux/mlx5/fs.h> #include "en.h" +#include "eswitch.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); @@ -1931,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_ESW_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_ESW_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_UP; + } + return MLX5_ESW_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + static struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -1941,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, + .ndo_change_mtu = mlx5e_change_mtu }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2028,7 +2102,12 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + if (is_zero_ether_addr(netdev->dev_addr) && + !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { + eth_hw_addr_random(netdev); + mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); + } } static void mlx5e_build_netdev(struct net_device *netdev) @@ -2041,6 +2120,14 @@ static void mlx5e_build_netdev(struct net_device *netdev) if (priv->params.num_tc > 1) mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; + mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; + mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; + mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; + mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + } + netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347..23c244a7e5d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000000..bc3d9f8a75c1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +#define UPLINK_VPORT 0xFFFF + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { + struct l2addr_node node; + u32 table_index; + u32 vport; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw->fdb_table.fdb, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -ENOMEM; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + memset(flow_group_in, 0, inlen); + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = mlx5_create_flow_table(root_ns, 0, table_size); + if (IS_ERR_OR_NULL(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + eth_broadcast_addr(dmac); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + + esw->fdb_table.addr_grp = g; + esw->fdb_table.fdb = fdb; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(fdb)) + mlx5_destroy_flow_table(fdb); + return err; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; + esw->fdb_table.addr_grp = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, esw_uc->vport); + return -EEXIST; + } + + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) + return -ENOMEM; + esw_uc->vport = vport; + + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); + if (err) + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); + return err; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + vport->enabled_events = 0; + spin_unlock_irqrestore(&vport->lock, flags); + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; +} + +/* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + u16 vlan; + u8 qos; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; + ivi->spoofchk = 0; + + return 0; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + +free_out: + kvfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000000..3416a428f70f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <linux/mlx5/device.h> + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; + u16 enabled_events; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch_fdb { + void *fdb; + struct mlx5_flow_group *addr_grp; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c deleted file mode 100644 index ca90b9bc3b95..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/export.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/flow_table.h> -#include "mlx5_core.h" - -struct mlx5_ftg { - struct mlx5_flow_table_group g; - u32 id; - u32 start_ix; -}; - -struct mlx5_flow_table { - struct mlx5_core_dev *dev; - u8 level; - u8 type; - u32 id; - struct mutex mutex; /* sync bitmap alloc */ - u16 num_groups; - struct mlx5_ftg *group; - unsigned long *bitmap; - u32 size; -}; - -static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, - u32 flow_index, void *flow_context) -{ - u32 out[MLX5_ST_SZ_DW(set_fte_out)]; - u32 *in; - void *in_flow_context; - int fcdls = - MLX5_GET(flow_context, flow_context, destination_list_size) * - MLX5_ST_SZ_BYTES(dest_format_struct); - int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - - MLX5_SET(set_fte_in, in, table_type, ft->type); - MLX5_SET(set_fte_in, in, table_id, ft->id); - MLX5_SET(set_fte_in, in, flow_index, flow_index); - MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); - - in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); - memcpy(in_flow_context, flow_context, - MLX5_ST_SZ_BYTES(flow_context) + fcdls); - - MLX5_SET(flow_context, in_flow_context, group_id, - ft->group[group_ix].id); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - kvfree(in); - - return err; -} - -static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) -{ - u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; - u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) - MLX5_SET_DFTEI(in, table_type, ft->type); - MLX5_SET_DFTEI(in, table_id, ft->id); - MLX5_SET_DFTEI(in, flow_index, flow_index); - MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) - MLX5_SET_DFGI(in, table_type, ft->type); - MLX5_SET_DFGI(in, table_id, ft->id); - MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); - MLX5_SET_DFGI(in, group_id, ft->group[i].id); - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) -{ - u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; - u32 *in; - void *in_match_criteria; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_group *g = &ft->group[i].g; - u32 start_ix = ft->group[i].start_ix; - u32 end_ix = start_ix + (1 << g->log_sz) - 1; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) { - mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); - return -ENOMEM; - } - in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria); - - memset(out, 0, sizeof(out)); - -#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) - MLX5_SET_CFGI(in, table_type, ft->type); - MLX5_SET_CFGI(in, table_id, ft->id); - MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); - MLX5_SET_CFGI(in, start_flow_index, start_ix); - MLX5_SET_CFGI(in, end_flow_index, end_ix); - MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); - - memcpy(in_match_criteria, g->match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); - - err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, - sizeof(out)); - if (!err) - ft->group[i].id = MLX5_GET(create_flow_group_out, out, - group_id); - - kvfree(in); - - return err; -} - -static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) -{ - int i; - - for (i = 0; i < ft->num_groups; i++) - mlx5_destroy_flow_group_cmd(ft, i); -} - -static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) -{ - int err; - int i; - - for (i = 0; i < ft->num_groups; i++) { - err = mlx5_create_flow_group_cmd(ft, i); - if (err) - goto err_destroy_flow_table_groups; - } - - return 0; - -err_destroy_flow_table_groups: - for (i--; i >= 0; i--) - mlx5_destroy_flow_group_cmd(ft, i); - - return err; -} - -static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; - int err; - - memset(in, 0, sizeof(in)); - - MLX5_SET(create_flow_table_in, in, table_type, ft->type); - MLX5_SET(create_flow_table_in, in, level, ft->level); - MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); - - MLX5_SET(create_flow_table_in, in, opcode, - MLX5_CMD_OP_CREATE_FLOW_TABLE); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, - sizeof(out)); - if (err) - return err; - - ft->id = MLX5_GET(create_flow_table_out, out, table_id); - - return 0; -} - -static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) -{ - u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - -#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) - MLX5_SET_DFTI(in, table_type, ft->type); - MLX5_SET_DFTI(in, table_id, ft->id); - MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); - - mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); -} - -static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - u32 *match_criteria, int *group_ix) -{ - void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); - void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, - misc_parameters); - void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, - inner_headers); - int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); - int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); - int i; - - for (i = 0; i < ft->num_groups; i++) { - struct mlx5_flow_table_group *g = &ft->group[i].g; - void *gmc_outer = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - outer_headers); - void *gmc_misc = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - misc_parameters); - void *gmc_inner = MLX5_ADDR_OF(fte_match_param, - g->match_criteria, - inner_headers); - - if (g->match_criteria_enable != match_criteria_enable) - continue; - - if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) - if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) - if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) - continue; - - if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) - if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) - continue; - - *group_ix = i; - return 0; - } - - return -EINVAL; -} - -static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) -{ - struct mlx5_ftg *g = &ft->group[group_ix]; - int err = 0; - - mutex_lock(&ft->mutex); - - *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); - if (*ix >= (g->start_ix + (1 << g->g.log_sz))) - err = -ENOSPC; - else - __set_bit(*ix, ft->bitmap); - - mutex_unlock(&ft->mutex); - - return err; -} - -static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) -{ - __clear_bit(ix, ft->bitmap); -} - -int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, - void *match_criteria, void *flow_context, - u32 *flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - int group_ix; - int err; - - err = mlx5_find_group(ft, match_criteria_enable, match_criteria, - &group_ix); - if (err) { - mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); - return err; - } - - err = alloc_flow_index(ft, group_ix, flow_index); - if (err) { - mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); - return err; - } - - return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); -} -EXPORT_SYMBOL(mlx5_add_flow_table_entry); - -void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_del_flow_entry_cmd(ft, flow_index); - mlx5_free_flow_index(ft, flow_index); -} -EXPORT_SYMBOL(mlx5_del_flow_table_entry); - -void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, - u16 num_groups, - struct mlx5_flow_table_group *group) -{ - struct mlx5_flow_table *ft; - u32 start_ix = 0; - u32 ft_size = 0; - void *gr; - void *bm; - int err; - int i; - - for (i = 0; i < num_groups; i++) - ft_size += (1 << group[i].log_sz); - - ft = kzalloc(sizeof(*ft), GFP_KERNEL); - gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); - bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); - if (!ft || !gr || !bm) - goto err_free_ft; - - ft->group = gr; - ft->bitmap = bm; - ft->num_groups = num_groups; - ft->level = level; - ft->type = table_type; - ft->size = ft_size; - ft->dev = dev; - mutex_init(&ft->mutex); - - for (i = 0; i < ft->num_groups; i++) { - memcpy(&ft->group[i].g, &group[i], sizeof(*group)); - ft->group[i].start_ix = start_ix; - start_ix += 1 << group[i].log_sz; - } - - err = mlx5_create_flow_table_cmd(ft); - if (err) - goto err_free_ft; - - err = mlx5_create_flow_table_groups(ft); - if (err) - goto err_destroy_flow_table_cmd; - - return ft; - -err_destroy_flow_table_cmd: - mlx5_destroy_flow_table_cmd(ft); - -err_free_ft: - mlx5_core_warn(dev, "failed to alloc flow table\n"); - kfree(bm); - kfree(gr); - kfree(ft); - - return NULL; -} -EXPORT_SYMBOL(mlx5_create_flow_table); - -void mlx5_destroy_flow_table(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - mlx5_destroy_flow_table_groups(ft); - mlx5_destroy_flow_table_cmd(ft); - kfree(ft->bitmap); - kfree(ft->group); - kfree(ft); -} -EXPORT_SYMBOL(mlx5_destroy_flow_table); - -u32 mlx5_get_flow_table_id(void *flow_table) -{ - struct mlx5_flow_table *ft = flow_table; - - return ft->id; -} -EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 000000000000..5096f4f336bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mlx5_ifc.h> + +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5_core.h" + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, table_type, type); + MLX5_SET(create_flow_table_in, in, level, level); + MLX5_SET(create_flow_table_in, in, log_size, log_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + + if (!err) + *table_id = MLX5_GET(create_flow_table_out, out, + table_id); + return err; +} + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, + unsigned int *group_id) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + int err; + + memset(out, 0, sizeof(out)); + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + + err = mlx5_cmd_exec_check_status(dev, in, + inlen, out, + sizeof(out)); + if (!err) + *group_id = MLX5_GET(create_flow_group_out, out, + group_id); + + return err; +} + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + struct mlx5_flow_rule *dst; + void *in_flow_context; + void *in_match_value; + void *in_dests; + u32 *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); + MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, destination_list_size, + fte->dests_size); + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + list_for_each_entry(dst, &fte->node.children, node.list) { + unsigned int id; + + MLX5_SET(dest_format_struct, in_dests, destination_type, + dst->dest_attr.type); + if (dst->dest_attr.type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + id = dst->dest_attr.ft->id; + else + id = dst->dest_attr.tir_num; + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + } + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + int opmod; + int modify_mask; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -ENOTSUPP; + opmod = 1; + modify_mask = 1 << + MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); +} + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, index); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 000000000000..f39304ede186 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id); + +int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft); + +int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + u32 *in, unsigned int *group_id); + +int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int group_id); + +int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte); + +int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + unsigned int index); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c new file mode 100644 index 000000000000..f7d62fe595f6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -0,0 +1,1047 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mutex.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define INIT_PRIO(min_level_val, max_ft_val,\ + start_level_val, ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .start_level = start_level_val,\ + .max_ft = max_ft_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\ + INIT_PRIO(min_level_val, max_ft_val, start_level_val,\ + __VA_ARGS__)\ + +#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\ + INIT_PRIO(0, max_ft_val, start_level_val,\ + __VA_ARGS__)\ + +#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define KERNEL_START_LEVEL 0 +#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL +#define KERNEL_MAX_FT 2 +#define KENREL_MIN_LEVEL 2 +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + int min_ft_level; + int prio; + int max_ft; + int start_level; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 1, + .children = (struct init_tree_node[]) { + ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT, + KERNEL_START_LEVEL, + ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT, + KERNEL_P0_START_LEVEL))), + } +}; + +static void del_rule(struct fs_node *node); +static void del_flow_table(struct fs_node *node); +static void del_flow_group(struct fs_node *node); +static void del_fte(struct fs_node *node); + +static void tree_init_node(struct fs_node *node, + unsigned int refcount, + void (*remove_func)(struct fs_node *)) +{ + atomic_set(&node->refcount, refcount); + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->children); + mutex_init(&node->lock); + node->remove_func = remove_func; +} + +static void tree_add_node(struct fs_node *node, struct fs_node *parent) +{ + if (parent) + atomic_inc(&parent->refcount); + node->parent = parent; + + /* Parent is the root */ + if (!parent) + node->root = node; + else + node->root = parent->root; +} + +static void tree_get_node(struct fs_node *node) +{ + atomic_inc(&node->refcount); +} + +static void nested_lock_ref_node(struct fs_node *node) +{ + if (node) { + mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING); + atomic_inc(&node->refcount); + } +} + +static void lock_ref_node(struct fs_node *node) +{ + if (node) { + mutex_lock(&node->lock); + atomic_inc(&node->refcount); + } +} + +static void unlock_ref_node(struct fs_node *node) +{ + if (node) { + atomic_dec(&node->refcount); + mutex_unlock(&node->lock); + } +} + +static void tree_put_node(struct fs_node *node) +{ + struct fs_node *parent_node = node->parent; + + lock_ref_node(parent_node); + if (atomic_dec_and_test(&node->refcount)) { + if (parent_node) + list_del_init(&node->list); + if (node->remove_func) + node->remove_func(node); + kfree(node); + node = NULL; + } + unlock_ref_node(parent_node); + if (!node && parent_node) + tree_put_node(parent_node); +} + +static int tree_remove_node(struct fs_node *node) +{ + if (atomic_read(&node->refcount) > 1) + return -EPERM; + tree_put_node(node); + return 0; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static unsigned int find_next_free_level(struct fs_prio *prio) +{ + if (!list_empty(&prio->node.children)) { + struct mlx5_flow_table *ft; + + ft = list_last_entry(&prio->node.children, + struct mlx5_flow_table, + node.list); + return ft->level + 1; + } + return prio->start_level; +} + +static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val1++, val2++) + if ((*((u8 *)val1) & (*(u8 *)mask)) != + ((*(u8 *)val2) & (*(u8 *)mask))) + return false; + + return true; +} + +static bool compare_match_value(struct mlx5_flow_group_mask *mask, + void *fte_param1, void *fte_param2) +{ + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, outer_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, outer_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, outer_headers); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, misc_parameters); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, misc_parameters); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, misc_parameters); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_misc))) + return false; + } + + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + fte_param1, inner_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + fte_param2, inner_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, inner_headers); + + if (!masked_memcmp(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + return true; +} + +static bool compare_match_criteria(u8 match_criteria_enable1, + u8 match_criteria_enable2, + void *mask1, void *mask2) +{ + return match_criteria_enable1 == match_criteria_enable2 && + !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); +} + +static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +{ + struct fs_node *root; + struct mlx5_flow_namespace *ns; + + root = node->root; + + if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) { + pr_warn("mlx5: flow steering node is not in tree or garbaged\n"); + return NULL; + } + + ns = container_of(root, struct mlx5_flow_namespace, node); + return container_of(ns, struct mlx5_flow_root_namespace, ns); +} + +static inline struct mlx5_core_dev *get_dev(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void del_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + struct fs_prio *prio; + int err; + + fs_get_obj(ft, node); + dev = get_dev(&ft->node); + + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + pr_warn("flow steering can't destroy ft\n"); + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; +} + +static void del_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + u32 *match_value; + struct mlx5_core_dev *dev = get_dev(node); + int match_len = MLX5_ST_SZ_BYTES(fte_match_param); + int err; + + match_value = mlx5_vzalloc(match_len); + if (!match_value) { + pr_warn("failed to allocate inbox\n"); + return; + } + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); + fs_get_obj(fg, fte->node.parent); + memcpy(match_value, fte->val, sizeof(fte->val)); + fs_get_obj(ft, fg->node.parent); + list_del(&rule->node.list); + fte->dests_size--; + if (fte->dests_size) { + err = mlx5_cmd_update_fte(dev, ft, + fg->id, fte); + if (err) + pr_warn("%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + } + kvfree(match_value); +} + +static void del_fte(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + dev = get_dev(&ft->node); + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + pr_warn("flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + + fte->status = 0; + fg->num_ftes--; +} + +static void del_flow_group(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&ft->node); + + if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + pr_warn("flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static struct fs_fte *alloc_fte(u8 action, + u32 flow_tag, + u32 *match_value, + unsigned int index) +{ + struct fs_fte *fte; + + fte = kzalloc(sizeof(*fte), GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, match_value, sizeof(fte->val)); + fte->node.type = FS_TYPE_FLOW_ENTRY; + fte->flow_tag = flow_tag; + fte->index = index; + fte->action = action; + + return fte; +} + +static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +{ + struct mlx5_flow_group *fg; + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + create_fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + create_fg_in, + match_criteria_enable); + fg = kzalloc(sizeof(*fg), GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->node.type = FS_TYPE_FLOW_GROUP; + fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, + start_flow_index); + fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, + end_flow_index) - fg->start_index + 1; + return fg; +} + +static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, + enum fs_flow_table_type table_type) +{ + struct mlx5_flow_table *ft; + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return NULL; + + ft->level = level; + ft->node.type = FS_TYPE_FLOW_TABLE; + ft->type = table_type; + ft->max_fte = max_fte; + + return ft; +} + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int max_fte) +{ + struct mlx5_flow_table *ft; + int err; + int log_table_sz; + struct mlx5_flow_root_namespace *root = + find_root(&ns->node); + struct fs_prio *fs_prio = NULL; + + if (!root) { + pr_err("mlx5: flow steering failed to find root of namespace\n"); + return ERR_PTR(-ENODEV); + } + + fs_prio = find_prio(ns, prio); + if (!fs_prio) + return ERR_PTR(-EINVAL); + + lock_ref_node(&fs_prio->node); + if (fs_prio->num_ft == fs_prio->max_ft) { + err = -ENOSPC; + goto unlock_prio; + } + + ft = alloc_flow_table(find_next_free_level(fs_prio), + roundup_pow_of_two(max_fte), + root->table_type); + if (!ft) { + err = -ENOMEM; + goto unlock_prio; + } + + tree_init_node(&ft->node, 1, del_flow_table); + log_table_sz = ilog2(ft->max_fte); + err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level, + log_table_sz, &ft->id); + if (err) + goto free_ft; + + tree_add_node(&ft->node, &fs_prio->node); + list_add_tail(&ft->node.list, &fs_prio->node.children); + fs_prio->num_ft++; + unlock_ref_node(&fs_prio->node); + + return ft; + +free_ft: + kfree(ft); +unlock_prio: + unlock_ref_node(&fs_prio->node); + return ERR_PTR(err); +} + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev = get_dev(&ft->node); + int err; + + if (!dev) + return ERR_PTR(-ENODEV); + + fg = alloc_flow_group(fg_in); + if (IS_ERR(fg)) + return fg; + + lock_ref_node(&ft->node); + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + kfree(fg); + unlock_ref_node(&ft->node); + return ERR_PTR(err); + } + /* Add node to tree */ + tree_init_node(&fg->node, 1, del_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, ft->node.children.prev); + unlock_ref_node(&ft->node); + + return fg; +} + +static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->node.type = FS_TYPE_FLOW_DEST; + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + + return rule; +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_rule *rule; + int err; + + rule = alloc_rule(dest); + if (!rule) + return ERR_PTR(-ENOMEM); + + fs_get_obj(ft, fg->node.parent); + /* Add dest to dests list- added as first element after the head */ + tree_init_node(&rule->node, 1, del_rule); + list_add_tail(&rule->node.list, &fte->node.children); + fte->dests_size++; + if (fte->dests_size == 1) + err = mlx5_cmd_create_fte(get_dev(&ft->node), + ft, fg->id, fte); + else + err = mlx5_cmd_update_fte(get_dev(&ft->node), + ft, fg->id, fte); + if (err) + goto free_rule; + + fte->status |= FS_FTE_STATUS_EXISTING; + + return rule; + +free_rule: + list_del(&rule->node.list); + kfree(rule); + fte->dests_size--; + return ERR_PTR(err); +} + +/* Assumed fg is locked */ +static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, + struct list_head **prev) +{ + struct fs_fte *fte; + unsigned int start = fg->start_index; + + if (prev) + *prev = &fg->node.children; + + /* assumed list is sorted by index */ + fs_for_each_fte(fte, fg) { + if (fte->index != start) + return start; + start++; + if (prev) + *prev = &fte->node.list; + } + + return start; +} + +/* prev is output, prev->next = new_fte */ +static struct fs_fte *create_fte(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct list_head **prev) +{ + struct fs_fte *fte; + int index; + + index = get_free_fte_index(fg, prev); + fte = alloc_fte(action, flow_tag, match_value, index); + if (IS_ERR(fte)) + return fte; + + return fte; +} + +/* Assuming parent fg(flow table) is locked */ +static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct fs_fte *fte; + struct mlx5_flow_rule *rule; + struct mlx5_flow_table *ft; + struct list_head *prev; + + lock_ref_node(&fg->node); + fs_for_each_fte(fte, fg) { + nested_lock_ref_node(&fte->node); + if (compare_match_value(&fg->mask, match_value, &fte->val) && + action == fte->action && flow_tag == fte->flow_tag) { + rule = add_rule_fte(fte, fg, dest); + unlock_ref_node(&fte->node); + if (IS_ERR(rule)) + goto unlock_fg; + else + goto add_rule; + } + unlock_ref_node(&fte->node); + } + fs_get_obj(ft, fg->node.parent); + if (fg->num_ftes >= fg->max_ftes) { + rule = ERR_PTR(-ENOSPC); + goto unlock_fg; + } + + fte = create_fte(fg, match_value, action, flow_tag, &prev); + if (IS_ERR(fte)) { + rule = (void *)fte; + goto unlock_fg; + } + tree_init_node(&fte->node, 0, del_fte); + rule = add_rule_fte(fte, fg, dest); + if (IS_ERR(rule)) { + kfree(fte); + goto unlock_fg; + } + + fg->num_ftes++; + + tree_add_node(&fte->node, &fg->node); + list_add(&fte->node.list, prev); +add_rule: + tree_add_node(&rule->node, &fte->node); +unlock_fg: + unlock_ref_node(&fg->node); + return rule; +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL); + + tree_get_node(&ft->node); + lock_ref_node(&ft->node); + fs_for_each_fg(g, ft) + if (compare_match_criteria(g->mask.match_criteria_enable, + match_criteria_enable, + g->mask.match_criteria, + match_criteria)) { + unlock_ref_node(&ft->node); + rule = add_rule_fg(g, match_value, + action, flow_tag, dest); + goto put; + } + unlock_ref_node(&ft->node); +put: + tree_put_node(&ft->node); + return rule; +} + +void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) +{ + tree_remove_node(&rule->node); +} + +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + if (tree_remove_node(&ft->node)) + mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", + ft->id); + + return 0; +} + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + if (tree_remove_node(&fg->node)) + mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", + fg->id); +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + int prio; + static struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!root_ns) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_KERNEL: + prio = 0; + break; + case MLX5_FLOW_NAMESPACE_FDB: + if (dev->priv.fdb_root_ns) + return &dev->priv.fdb_root_ns->ns; + else + return NULL; + default: + return NULL; + } + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned prio, int max_ft, + int start_level) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = FS_TYPE_PRIO; + tree_init_node(&fs_prio->node, 1, NULL); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->max_ft = max_ft; + fs_prio->prio = prio; + fs_prio->start_level = start_level; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int index) +{ + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if (init_node->min_ft_level > max_ft_level) + return -ENOTSUPP; + + fs_get_obj(fs_ns, fs_parent_node); + fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft, + init_node->start_level); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(max_ft_level, + &init_node->children[i], base, + init_node, i); + if (err) + return err; + } + + return 0; +} + +static int init_root_tree(int max_ft_level, struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int i; + struct mlx5_flow_namespace *fs_ns; + int err; + + fs_get_obj(fs_ns, fs_parent_node); + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(max_ft_level, + &init_node->children[i], + &fs_ns->node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, + enum fs_flow_table_type + table_type) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* Create the root namespace */ + root_ns = mlx5_vzalloc(sizeof(*root_ns)); + if (!root_ns) + return NULL; + + root_ns->dev = dev; + root_ns->table_type = table_type; + + ns = &root_ns->ns; + fs_init_namespace(ns); + tree_init_node(&ns->node, 1, NULL); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static int init_root_ns(struct mlx5_core_dev *dev) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + max_ft_level); + + dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(dev->priv.root_ns)) + goto cleanup; + + if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node)) + goto cleanup; + + return 0; + +cleanup: + mlx5_cleanup_fs(dev); + return -ENOMEM; +} + +static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, + struct mlx5_flow_root_namespace *root_ns) +{ + struct fs_node *prio; + + if (!root_ns) + return; + + if (!list_empty(&root_ns->ns.node.children)) { + prio = list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + if (tree_remove_node(prio)) + mlx5_core_warn(dev, + "Flow steering priority wasn't destroyed, refcount > 1\n"); + } + if (tree_remove_node(&root_ns->ns.node)) + mlx5_core_warn(dev, + "Flow steering namespace wasn't destroyed, refcount > 1\n"); + root_ns = NULL; +} + +static void cleanup_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct fs_prio *iter_prio; + + if (!MLX5_CAP_GEN(dev, nic_flow_table)) + return; + + if (!root_ns) + return; + + /* stage 1 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + struct fs_node *node; + struct mlx5_flow_namespace *iter_ns; + + fs_for_each_ns_or_ft(node, iter_prio) { + if (node->type == FS_TYPE_FLOW_TABLE) + continue; + fs_get_obj(iter_ns, node); + while (!list_empty(&iter_ns->node.children)) { + struct fs_prio *obj_iter_prio2; + struct fs_node *iter_prio2 = + list_first_entry(&iter_ns->node.children, + struct fs_node, + list); + + fs_get_obj(obj_iter_prio2, iter_prio2); + if (tree_remove_node(iter_prio2)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_iter_prio2->prio); + return; + } + } + } + } + + /* stage 2 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + while (!list_empty(&iter_prio->node.children)) { + struct fs_node *iter_ns = + list_first_entry(&iter_prio->node.children, + struct fs_node, + list); + if (tree_remove_node(iter_ns)) { + mlx5_core_warn(dev, + "Namespace wasn't destroyed, refcount > 1\n"); + return; + } + } + } + + /* stage 3 */ + while (!list_empty(&root_ns->ns.node.children)) { + struct fs_prio *obj_prio_node; + struct fs_node *prio_node = + list_first_entry(&root_ns->ns.node.children, + struct fs_node, + list); + + fs_get_obj(obj_prio_node, prio_node); + if (tree_remove_node(prio_node)) { + mlx5_core_warn(dev, + "Priority %d wasn't destroyed, refcount > 1\n", + obj_prio_node->prio); + return; + } + } + + if (tree_remove_node(&root_ns->ns.node)) { + mlx5_core_warn(dev, + "root namespace wasn't destroyed, refcount > 1\n"); + return; + } + + dev->priv.root_ns = NULL; +} + +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +{ + cleanup_root_ns(dev); + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); +} + +static int init_fdb_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); + if (!dev->priv.fdb_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0); + if (IS_ERR(prio)) { + cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + return PTR_ERR(prio); + } else { + return 0; + } +} + +int mlx5_init_fs(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = init_root_ns(dev); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = init_fdb_root_ns(dev); + if (err) + cleanup_root_ns(dev); + } + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 000000000000..4ebb97fd5544 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include <linux/mlx5/fs.h> + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, + FS_FT_FDB = 0X4, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; + struct fs_node *parent; + struct fs_node *root; + /* lock the node for writing and traversing */ + struct mutex lock; + atomic_t refcount; + void (*remove_func)(struct fs_node *); +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_destination dest_attr; +}; + +/* Type of children is mlx5_flow_group */ +struct mlx5_flow_table { + struct fs_node node; + u32 id; + unsigned int max_fte; + unsigned int level; + enum fs_flow_table_type type; +}; + +/* Type of children is mlx5_flow_rule */ +struct fs_fte { + struct fs_node node; + u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 dests_size; + u32 flow_tag; + u32 index; + u32 action; + enum fs_fte_status status; +}; + +/* Type of children is mlx5_flow_table/namespace */ +struct fs_prio { + struct fs_node node; + unsigned int max_ft; + unsigned int start_level; + unsigned int prio; + unsigned int num_ft; +}; + +/* Type of children is fs_prio */ +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_node node; +}; + +struct mlx5_flow_group_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +/* Type of children is fs_fte */ +struct mlx5_flow_group { + struct fs_node node; + struct mlx5_flow_group_mask mask; + u32 start_index; + u32 max_ftes; + u32 num_ftes; + u32 id; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + enum fs_flow_table_type table_type; + struct mlx5_core_dev *dev; +}; + +int mlx5_init_fs(struct mlx5_core_dev *dev); +void mlx5_cleanup_fs(struct mlx5_core_dev *dev); + +#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } + +#define fs_list_for_each_entry(pos, root) \ + list_for_each_entry(pos, root, node.list) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->node.children, list) + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, (&(prio)->node.children), list) + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, &(ns)->node.children) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, &(ft)->node.children) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, &(fg)->node.children) + +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, &(fte)->node.children) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18cc..aa1ab4702385 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc4973..789882b7b711 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,10 @@ #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" +#include "fs_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -454,6 +458,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -462,42 +469,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } -static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; int err; - struct mlx5_enable_hca_mbox_in in; - struct mlx5_enable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + memset(in, 0, sizeof(in)); + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } -static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; int err; - struct mlx5_disable_hca_mbox_in in; - struct mlx5_disable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + memset(in, 0, sizeof(in)); + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -942,7 +946,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); goto err_pagealloc_cleanup; @@ -1052,6 +1056,25 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_init_fs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init flow steering\n"); + goto err_fs; + } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1068,7 +1091,16 @@ out: return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: + mlx5_cleanup_fs(dev); +err_fs: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1106,7 +1138,7 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); @@ -1123,6 +1155,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1130,6 +1169,11 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + + mlx5_cleanup_fs(dev); mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1149,7 +1193,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -1195,6 +1239,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1365,12 +1410,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1381,7 +1426,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cee5b7a839bc..ea6a137fd76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/if_link.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" @@ -64,6 +65,9 @@ do { \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_info(__dev, format, ...) \ + dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) + enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -90,6 +94,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b12657..9eeee0545f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ retry: goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000000..7b24386794f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a..076197efea9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,54 +36,399 @@ #include <linux/mlx5/vport.h> #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + ether_addr_copy(addr, &out_addr[2]); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - ether_addr_copy(addr, &out_addr[2]); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; - kvfree(out); + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, @@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index e36e12219c9b..ec8caf8fedc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -10,6 +10,14 @@ config MLXSW_CORE To compile this driver as a module, choose M here: the module will be called mlxsw_core. +config MLXSW_CORE_HWMON + bool "HWMON support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && HWMON + depends on !(MLXSW_CORE=y && HWMON=m) + default y + ---help--- + Say Y here if you want to expose HWMON interface on mlxsw devices. + config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index af015818fd19..584cac444852 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o +mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o mlxsw_pci-objs := pci.o obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 97f0d93caf99..af8a48b3b3ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -105,6 +105,10 @@ struct mlxsw_core { struct debugfs_blob_wrapper vsd_blob; struct debugfs_blob_wrapper psid_blob; } dbg; + struct { + u8 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; + struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -814,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } + if (mlxsw_driver->profile->used_max_lag && + mlxsw_driver->profile->used_max_port_per_lag) { + alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * + mlxsw_driver->profile->max_port_per_lag; + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); if (err) goto err_bus_init; @@ -822,6 +837,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, mlxsw_bus_info); if (err) @@ -836,10 +855,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core->driver_priv); err_driver_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); +err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); err_bus_init: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: kfree(mlxsw_core); @@ -855,8 +878,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) mlxsw_core_debugfs_fini(mlxsw_core); mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_hwmon_fini(mlxsw_core->hwmon); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); kfree(mlxsw_core); mlxsw_core_driver_put(device_kind); @@ -1188,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; struct mlxsw_core_pcpu_stats *pcpu_stats; - u8 local_port = rx_info->sys_port; + u8 local_port; bool found = false; - dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", - __func__, rx_info->sys_port, rx_info->trap_id); + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || (local_port >= MLXSW_PORT_MAX_PORTS)) @@ -1236,6 +1275,48 @@ drop: } EXPORT_SYMBOL(mlxsw_core_skb_receive); +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 807827350a89..4833fb33ce07 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); struct mlxsw_rx_info { - u16 sys_port; + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u8 lag_port_index; int trap_id; }; void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_info *rx_info); +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port); +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -209,4 +221,28 @@ struct mlxsw_bus_info { u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; }; +struct mlxsw_hwmon; + +#ifdef CONFIG_MLXSW_CORE_HWMON + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon); +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon); + +#else + +static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + return 0; +} + +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + +#endif + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c new file mode 100644 index 000000000000..b86db967eab9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -0,0 +1,342 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/hwmon.h> +#include <linux/err.h> + +#include "core.h" + +#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ + MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) + +struct mlxsw_hwmon_attr { + struct device_attribute dev_attr; + struct mlxsw_hwmon *hwmon; + unsigned int type_index; + char name[16]; +}; + +struct mlxsw_hwmon { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct device *hwmon_dev; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; + struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; + unsigned int attrs_count; +}; + +static ssize_t mlxsw_hwmon_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp_max; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); + return sprintf(buf, "%u\n", temp_max); +} + +static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsm_pl[MLXSW_REG_MFSM_LEN]; + int err; + + mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); + return err; + } + return sprintf(buf, "%u\n", + mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val > 255) + return -EINVAL; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); + return err; + } + return len; +} + +enum mlxsw_hwmon_attr_type { + MLXSW_HWMON_ATTR_TYPE_TEMP, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_PWM, +}; + +static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, + enum mlxsw_hwmon_attr_type attr_type, + unsigned int type_index, unsigned int num) { + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr; + unsigned int attr_index; + + attr_index = mlxsw_hwmon->attrs_count; + mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index]; + + switch (attr_type) { + case MLXSW_HWMON_ATTR_TYPE_TEMP: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_highest", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_RPM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_PWM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "pwm%u", num + 1); + break; + default: + WARN_ON(1); + } + + mlxsw_hwmon_attr->type_index = type_index; + mlxsw_hwmon_attr->hwmon = mlxsw_hwmon; + mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name; + sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr); + + mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr; + mlxsw_hwmon->attrs_count++; +} + +static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mtcap_pl[MLXSW_REG_MTCAP_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 sensor_count; + int i; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); + return err; + } + sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < sensor_count; i++) { + mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + i); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i); + } + return 0; +} + +static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mfcr_pl[MLXSW_REG_MFCR_LEN]; + enum mlxsw_reg_mfcr_pwm_frequency freq; + unsigned int type_index; + unsigned int num; + u16 tacho_active; + u8 pwm_active; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n"); + return err; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { + if (tacho_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num++); + } + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { + if (pwm_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_PWM, + type_index, num++); + } + return 0; +} + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + struct mlxsw_hwmon *mlxsw_hwmon; + struct device *hwmon_dev; + int err; + + mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL); + if (!mlxsw_hwmon) + return -ENOMEM; + mlxsw_hwmon->core = mlxsw_core; + mlxsw_hwmon->bus_info = mlxsw_bus_info; + + err = mlxsw_hwmon_temp_init(mlxsw_hwmon); + if (err) + goto err_temp_init; + + err = mlxsw_hwmon_fans_init(mlxsw_hwmon); + if (err) + goto err_fans_init; + + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; + mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; + + hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", + mlxsw_hwmon, + mlxsw_hwmon->groups); + if (IS_ERR(hwmon_dev)) { + err = PTR_ERR(hwmon_dev); + goto err_hwmon_register; + } + + mlxsw_hwmon->hwmon_dev = hwmon_dev; + *p_hwmon = mlxsw_hwmon; + return 0; + +err_hwmon_register: +err_fans_init: +err_temp_init: + kfree(mlxsw_hwmon); + return err; +} + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index de69e719dc9d..d2102e572b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - /* We do not support lag now */ - if (mlxsw_pci_cqe_lag_get(cqe)) - goto drop; + if (mlxsw_pci_cqe_lag_get(cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); + rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } - rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); @@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); -put_new_skb: memset(wqe, 0, q->elem_size); err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); if (err) @@ -708,10 +711,6 @@ put_new_skb: q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; - -drop: - dev_kfree_skb_any(skb); - goto put_new_skb; } static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 142f33d978c5..912106054ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); */ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); -/* pci_cqe_system_port +/* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received * When lag=1: * bits [15:4] LAG ID on which the packet was received * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 236fb5d2ad69..af631df4603a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, }; /* reg_sfd_rec_type @@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x0C, false); -static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, - enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, - u8 local_port) +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) { u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); if (rec_index >= num_rec) mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); - mlxsw_reg_sfd_rec_type_set(payload, rec_index, - MLXSW_REG_SFD_REC_TYPE_UNICAST); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, + policy, mac, action); mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); - mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } @@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + policy, mac, action); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and @@ -456,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, enum mlxsw_reg_sfn_rec_type { /* MAC addresses learned on a regular port. */ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, - /* Aged-out MAC address on a regular port */ + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, }; /* reg_sfn_rec_type @@ -505,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); } +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -865,6 +948,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +static const struct mlxsw_reg_info mlxsw_reg_sldr = { + .id = MLXSW_REG_SLDR_ID, + .len = MLXSW_REG_SLDR_LEN, +}; + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcr = { + .id = MLXSW_REG_SLCR_ID, + .len = MLXSW_REG_SLCR_LEN, +}; + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcor = { + .id = MLXSW_REG_SLCOR_ID, + .len = MLXSW_REG_SLCOR_LEN, +}; + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u8 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u8 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + /* SPMLR - Switch Port MAC Learning Register * ----------------------------------------- * Controls the Switch MAC learning policy per port. @@ -2087,6 +2457,284 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* MFCR - Management Fan Control Register + * -------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFCR_ID 0x9001 +#define MLXSW_REG_MFCR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfcr = { + .id = MLXSW_REG_MFCR_ID, + .len = MLXSW_REG_MFCR_LEN, +}; + +enum mlxsw_reg_mfcr_pwm_frequency { + MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, + MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01, + MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02, + MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40, + MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41, + MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42, + MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43, + MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44, +}; + +/* reg_mfcr_pwm_frequency + * Controls the frequency of the PWM signal. + * Access: RW + */ +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); + +#define MLXSW_MFCR_TACHOS_MAX 10 + +/* reg_mfcr_tacho_active + * Indicates which of the tachometer is active (bit per tachometer). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX); + +#define MLXSW_MFCR_PWMS_MAX 5 + +/* reg_mfcr_pwm_active + * Indicates which of the PWM control is active (bit per PWM). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX); + +static inline void +mlxsw_reg_mfcr_pack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency) +{ + MLXSW_REG_ZERO(mfcr, payload); + mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency); +} + +static inline void +mlxsw_reg_mfcr_unpack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency, + u16 *p_tacho_active, u8 *p_pwm_active) +{ + *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload); + *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload); + *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload); +} + +/* MFSC - Management Fan Speed Control Register + * -------------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFSC_ID 0x9002 +#define MLXSW_REG_MFSC_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsc = { + .id = MLXSW_REG_MFSC_ID, + .len = MLXSW_REG_MFSC_LEN, +}; + +/* reg_mfsc_pwm + * Fan pwm to control / monitor. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3); + +/* reg_mfsc_pwm_duty_cycle + * Controls the duty cycle of the PWM. Value range from 0..255 to + * represent duty cycle of 0%...100%. + * Access: RW + */ +MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8); + +static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, + u8 pwm_duty_cycle) +{ + MLXSW_REG_ZERO(mfsc, payload); + mlxsw_reg_mfsc_pwm_set(payload, pwm); + mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle); +} + +/* MFSM - Management Fan Speed Measurement + * --------------------------------------- + * This register controls the settings of the Tacho measurements and + * enables reading the Tachometer measurements. + */ +#define MLXSW_REG_MFSM_ID 0x9003 +#define MLXSW_REG_MFSM_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsm = { + .id = MLXSW_REG_MFSM_ID, + .len = MLXSW_REG_MFSM_LEN, +}; + +/* reg_mfsm_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4); + +/* reg_mfsm_rpm + * Fan speed (round per minute). + * Access: RO + */ +MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16); + +static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) +{ + MLXSW_REG_ZERO(mfsm, payload); + mlxsw_reg_mfsm_tacho_set(payload, tacho); +} + +/* MTCAP - Management Temperature Capabilities + * ------------------------------------------- + * This register exposes the capabilities of the device and + * system temperature sensing. + */ +#define MLXSW_REG_MTCAP_ID 0x9009 +#define MLXSW_REG_MTCAP_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mtcap = { + .id = MLXSW_REG_MTCAP_ID, + .len = MLXSW_REG_MTCAP_LEN, +}; + +/* reg_mtcap_sensor_count + * Number of sensors supported by the device. + * This includes the QSFP module sensors (if exists in the QSFP module). + * Access: RO + */ +MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); + +/* MTMP - Management Temperature + * ----------------------------- + * This register controls the settings of the temperature measurements + * and enables reading the temperature measurements. Note that temperature + * is in 0.125 degrees Celsius. + */ +#define MLXSW_REG_MTMP_ID 0x900A +#define MLXSW_REG_MTMP_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_mtmp = { + .id = MLXSW_REG_MTMP_ID, + .len = MLXSW_REG_MTMP_LEN, +}; + +/* reg_mtmp_sensor_index + * Sensors index to access. + * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially + * (module 0 is mapped to sensor_index 64). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); + +/* Convert to milli degrees Celsius */ +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) + +/* reg_mtmp_temperature + * Temperature reading from the sensor. Reading is in 0.125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16); + +/* reg_mtmp_mte + * Max Temperature Enable - enables measuring the max temperature on a sensor. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1); + +/* reg_mtmp_mtr + * Max Temperature Reset - clears the value of the max temperature register. + * Access: WO + */ +MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); + +/* reg_mtmp_max_temperature + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); + +#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 + +/* reg_mtmp_sensor_name + * Sensor Name + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); + +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, + bool max_temp_enable, + bool max_temp_reset) +{ + MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); + mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); + mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); +} + +static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, + unsigned int *p_max_temp, + char *sensor_name) +{ + u16 temp; + + if (p_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_temp) { + temp = mlxsw_reg_mtmp_max_temperature_get(payload); + *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (sensor_name) + mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); +} + +/* MLCR - Management LED Control Register + * -------------------------------------- + * Controls the system LEDs. + */ +#define MLXSW_REG_MLCR_ID 0x902B +#define MLXSW_REG_MLCR_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_mlcr = { + .id = MLXSW_REG_MLCR_ID, + .len = MLXSW_REG_MLCR_LEN, +}; + +/* reg_mlcr_local_port + * Local port number. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8); + +#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF + +/* reg_mlcr_beacon_duration + * Duration of the beacon to be active, in seconds. + * 0x0 - Will turn off the beacon. + * 0xFFFF - Will turn on the beacon until explicitly turned off. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16); + +/* reg_mlcr_beacon_remain + * Remaining duration of the beacon, in seconds. + * 0xFFFF indicates an infinite amount of time. + * Access: RO + */ +MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16); + +static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, + bool active) +{ + MLXSW_REG_ZERO(mlcr, payload); + mlxsw_reg_mlcr_local_port_set(payload, local_port); + mlxsw_reg_mlcr_beacon_duration_set(payload, active ? + MLXSW_REG_MLCR_DURATION_MAX : 0); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -2375,6 +3023,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SLDR_ID: + return "SLDR"; + case MLXSW_REG_SLCR_ID: + return "SLCR"; + case MLXSW_REG_SLCOR_ID: + return "SLCOR"; case MLXSW_REG_SPMLR_ID: return "SPMLR"; case MLXSW_REG_SVFA_ID: @@ -2405,6 +3059,18 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_MFCR_ID: + return "MFCR"; + case MLXSW_REG_MFSC_ID: + return "MFSC"; + case MLXSW_REG_MFSM_ID: + return "MFSM"; + case MLXSW_REG_MTCAP_ID: + return "MTCAP"; + case MLXSW_REG_MTMP_ID: + return "MTMP"; + case MLXSW_REG_MLCR_ID: + return "MLCR"; case MLXSW_REG_SBPR_ID: return "SBPR"; case MLXSW_REG_SBCM_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3be4a2355ead..322ed544348f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, @@ -859,6 +864,29 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, } } +static int mlxsw_sp_port_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mlcr_pl[MLXSW_REG_MLCR_LEN]; + bool active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + active = true; + break; + case ETHTOOL_ID_INACTIVE: + active = false; + break; + default: + return -EOPNOTSUPP; + } + + mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); +} + static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -1205,6 +1233,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, .get_settings = mlxsw_sp_port_get_settings, @@ -1683,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); +} + static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -1728,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_buffers_init; } + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -1737,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return 0; err_switchdev_init: +err_lag_init: err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -1764,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_lag = 1, - .max_lag = 64, + .max_lag = MLXSW_SP_LAG_MAX, .used_max_port_per_lag = 1, - .max_port_per_lag = 16, + .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, @@ -1865,19 +1917,245 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->master_bridge.dev = NULL; } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + int i; + + for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + return false; + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); + if (err) + goto err_col_port_enable; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + return 0; + +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + return err; +} + +static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id = mlxsw_sp_port->lag_id; + int err; + + if (!mlxsw_sp_port->lagged) + return 0; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + if (err) + return err; + err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + if (err) + return err; + + if (lag->ref_count == 1) { + err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + if (err) + return err; + } + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + return 0; +} + +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool lag_tx_enabled) +{ + if (lag_tx_enabled) + return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + else + return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err; - if (!mlxsw_sp_port_dev_check(dev)) - return NOTIFY_DONE; - mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; @@ -1885,16 +2163,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; /* HW limitation forbids to put ports to multiple bridges. */ - if (info->master && info->linking && - netif_is_bridge_master(upper_dev) && + if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) return NOTIFY_BAD; + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info)) + return NOTIFY_BAD; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->master && - netif_is_bridge_master(upper_dev)) { + if (!info->master) + break; + if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); if (err) @@ -1908,6 +2192,46 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, mlxsw_sp_port->bridged = 0; mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); } + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join link aggregation\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave link aggregation\n"); + return NOTIFY_BAD; + } + } + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); } break; } @@ -1915,6 +2239,52 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_netdevice_port_event(dev, event, ptr); + + if (netif_is_lag_master(dev)) + return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + + return NOTIFY_DONE; +} + static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d..48be5a63b9b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,9 +46,16 @@ #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_LAG_MAX 64 +#define MLXSW_SP_PORT_PER_LAG_MAX 16 struct mlxsw_sp_port; +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + struct mlxsw_sp { unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; @@ -63,12 +70,16 @@ struct mlxsw_sp { } fdb_notify; #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct { - struct net_device *dev; - unsigned int ref_count; - } master_bridge; + struct mlxsw_sp_upper master_bridge; + struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; }; +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -87,8 +98,10 @@ struct mlxsw_sp_port { u8 learning:1, learning_sync:1, uc_flood:1, - bridged:1; + bridged:1, + lagged:1; u16 pvid; + u16 lag_id; /* 802.1Q bridge VLANs */ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; /* VLAN interfaces */ @@ -96,6 +109,18 @@ struct mlxsw_sp_port { u16 nr_vfids; }; +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f21e23983a1a..406dab2f6b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -490,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, untagged_flag, pvid_flag); } -static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, - bool dynamic) +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { - enum mlxsw_reg_sfd_rec_policy policy; - enum mlxsw_reg_sfd_op op; + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; +} + +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; int err; - if (!vid) - vid = mlxsw_sp_port->pvid; - sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); if (!sfd_pl) return -ENOMEM; - policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; - op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : - MLXSW_REG_SFD_OP_WRITE_REMOVE; - mlxsw_reg_sfd_pack(sfd_pl, op, 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, mlxsw_sp_port->local_port); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), - sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + char *sfd_pl; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; @@ -526,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + u16 vid = fdb->vid; + if (switchdev_trans_ph_prepare(trans)) return 0; - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - true, false); + if (!vid) + vid = mlxsw_sp_port->pvid; + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, vid, true, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, vid, true, false); } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -645,8 +679,15 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - false, false); + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, fdb->vid, + false, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fdb->vid, + false, false); } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -672,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, return err; } +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; char mac[ETH_ALEN]; u16 vid; u8 local_port; + u16 lag_id; u8 num_rec; int stored_err = 0; int i; @@ -692,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sfd), sfd_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) goto out; @@ -718,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (err) stored_err = err; } + break; + case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: + mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, + mac, &vid, &lag_id); + if (mlxsw_sp_port == + mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = vid; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + break; } } } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); @@ -779,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; +static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, + bool adding, char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + if (learning && learning_sync) { + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, dev, &info.info); + } +} + static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) @@ -796,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, return; } - err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, - adding && mlxsw_sp_port->learning, true); + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid, + adding && mlxsw_sp_port->learning, true); if (err) { if (net_ratelimit()) netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); return; } - if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, mlxsw_sp_port->dev); +} - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, - &info.info); +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u16 lag_id; + u16 vid; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + return; } + + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid, + adding && mlxsw_sp_port->learning, + true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; + } + + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, + mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); } static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, @@ -828,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig new file mode 100644 index 000000000000..9508ad782c30 --- /dev/null +++ b/drivers/net/ethernet/netronome/Kconfig @@ -0,0 +1,36 @@ +# +# Netronome device configuration +# + +config NET_VENDOR_NETRONOME + bool "Netronome(R) devices" + default y + ---help--- + If you have a Netronome(R) network (Ethernet) card or device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Netronome(R) cards. If you say Y, you will be + asked for your specific card in the following questions. + +if NET_VENDOR_NETRONOME + +config NFP_NETVF + tristate "Netronome(R) NFP4000/NFP6000 VF NIC driver" + depends on PCI && PCI_MSI + depends on VXLAN || VXLAN=n + ---help--- + This driver supports SR-IOV virtual functions of + the Netronome(R) NFP4000/NFP6000 cards working as + a advanced Ethernet NIC. + +config NFP_NET_DEBUG + bool "Debug support for Netronome(R) NFP3200/NFP6000 NIC drivers" + depends on NFP_NET || NFP_NETVF + ---help--- + Enable extra sanity checks and debugfs support in + Netronome(R) NFP3200/NFP6000 NIC PF and VF drivers. + Note: selecting this option may adversely impact + performance. + +endif diff --git a/drivers/net/ethernet/netronome/Makefile b/drivers/net/ethernet/netronome/Makefile new file mode 100644 index 000000000000..dcb7b383f634 --- /dev/null +++ b/drivers/net/ethernet/netronome/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Netronome network device drivers +# + +obj-$(CONFIG_NFP_NETVF) += nfp/ diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile new file mode 100644 index 000000000000..68178819ff12 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o + +nfp_netvf-objs := \ + nfp_net_common.o \ + nfp_net_ethtool.o \ + nfp_netvf_main.o + +nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h new file mode 100644 index 000000000000..ab264e1bccd0 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -0,0 +1,748 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net.h + * Declarations for Netronome network device driver. + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#ifndef _NFP_NET_H_ +#define _NFP_NET_H_ + +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <asm-generic/io-64-nonatomic-hi-lo.h> + +#include "nfp_net_ctrl.h" + +#define nn_err(nn, fmt, args...) netdev_err((nn)->netdev, fmt, ## args) +#define nn_warn(nn, fmt, args...) netdev_warn((nn)->netdev, fmt, ## args) +#define nn_info(nn, fmt, args...) netdev_info((nn)->netdev, fmt, ## args) +#define nn_dbg(nn, fmt, args...) netdev_dbg((nn)->netdev, fmt, ## args) +#define nn_warn_ratelimit(nn, fmt, args...) \ + do { \ + if (unlikely(net_ratelimit())) \ + netdev_warn((nn)->netdev, fmt, ## args); \ + } while (0) + +/* Max time to wait for NFP to respond on updates (in ms) */ +#define NFP_NET_POLL_TIMEOUT 5000 + +/* Bar allocation */ +#define NFP_NET_CRTL_BAR 0 +#define NFP_NET_Q0_BAR 2 +#define NFP_NET_Q1_BAR 4 /* OBSOLETE */ + +/* Max bits in DMA address */ +#define NFP_NET_MAX_DMA_BITS 40 + +/* Default size for MTU and freelist buffer sizes */ +#define NFP_NET_DEFAULT_MTU 1500 +#define NFP_NET_DEFAULT_RX_BUFSZ 2048 + +/* Maximum number of bytes prepended to a packet */ +#define NFP_NET_MAX_PREPEND 64 + +/* Interrupt definitions */ +#define NFP_NET_NON_Q_VECTORS 2 +#define NFP_NET_IRQ_LSC_IDX 0 +#define NFP_NET_IRQ_EXN_IDX 1 + +/* Queue/Ring definitions */ +#define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ +#define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ + +#define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ +#define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ +#define NFP_NET_MAX_TX_DESCS (256 * 1024) /* Max. # of Tx descs per ring */ +#define NFP_NET_MAX_RX_DESCS (256 * 1024) /* Max. # of Rx descs per ring */ + +#define NFP_NET_TX_DESCS_DEFAULT 4096 /* Default # of Tx descs per ring */ +#define NFP_NET_RX_DESCS_DEFAULT 4096 /* Default # of Rx descs per ring */ + +#define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */ + +/* Offload definitions */ +#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) + +/* Forward declarations */ +struct nfp_net; +struct nfp_net_r_vector; + +/* Convenience macro for writing dma address into RX/TX descriptors */ +#define nfp_desc_set_dma_addr(desc, dma_addr) \ + do { \ + __typeof(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ + } while (0) + +/* TX descriptor format */ + +#define PCIE_DESC_TX_EOP BIT(7) +#define PCIE_DESC_TX_OFFSET_MASK GENMASK(6, 0) +#define PCIE_DESC_TX_MSS_MASK GENMASK(13, 0) + +/* Flags in the host TX descriptor */ +#define PCIE_DESC_TX_CSUM BIT(7) +#define PCIE_DESC_TX_IP4_CSUM BIT(6) +#define PCIE_DESC_TX_TCP_CSUM BIT(5) +#define PCIE_DESC_TX_UDP_CSUM BIT(4) +#define PCIE_DESC_TX_VLAN BIT(3) +#define PCIE_DESC_TX_LSO BIT(2) +#define PCIE_DESC_TX_ENCAP BIT(1) +#define PCIE_DESC_TX_O_IP4_CSUM BIT(0) + +struct nfp_net_tx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len; /* Length to DMA for this desc */ + u8 offset_eop; /* Offset in buf where pkt starts + + * highest bit is eop flag. + */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + + __le16 mss; /* MSS to be used for LSO */ + u8 l4_offset; /* LSO, where the L4 data starts */ + u8 flags; /* TX Flags, see @PCIE_DESC_TX_* */ + + __le16 vlan; /* VLAN tag to add if indicated */ + __le16 data_len; /* Length of frame + meta data */ + } __packed; + __le32 vals[4]; + }; +}; + +/** + * struct nfp_net_tx_buf - software TX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) + * @pkt_cnt: Number of packets to be produced out of the skb associated + * with this buffer (valid only on the head's buffer). + * Will be 1 for all non-TSO packets. + * @real_len: Number of bytes which to be produced out of the skb (valid only + * on the head's buffer). Equal to skb->len for non-TSO packets. + */ +struct nfp_net_tx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; + short int fidx; + u16 pkt_cnt; + u32 real_len; +}; + +/** + * struct nfp_net_tx_ring - TX ring structure + * @r_vec: Back pointer to ring vector structure + * @idx: Ring index from Linux's perspective + * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue + * @qcp_q: Pointer to base of the QCP TX queue + * @cnt: Size of the queue in number of descriptors + * @wr_p: TX ring write pointer (free running) + * @rd_p: TX ring read pointer (free running) + * @qcp_rd_p: Local copy of QCP TX queue read pointer + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for .xmit_more delayed kick) + * @txbufs: Array of transmitted TX buffers, to free on transmit + * @txds: Virtual address of TX ring in host memory + * @dma: DMA address of the TX ring + * @size: Size, in bytes, of the TX ring (needed to free) + */ +struct nfp_net_tx_ring { + struct nfp_net_r_vector *r_vec; + + u32 idx; + int qcidx; + u8 __iomem *qcp_q; + + u32 cnt; + u32 wr_p; + u32 rd_p; + u32 qcp_rd_p; + + u32 wr_ptr_add; + + struct nfp_net_tx_buf *txbufs; + struct nfp_net_tx_desc *txds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/* RX and freelist descriptor format */ + +#define PCIE_DESC_RX_DD BIT(7) +#define PCIE_DESC_RX_META_LEN_MASK GENMASK(6, 0) + +/* Flags in the RX descriptor */ +#define PCIE_DESC_RX_RSS cpu_to_le16(BIT(15)) +#define PCIE_DESC_RX_I_IP4_CSUM cpu_to_le16(BIT(14)) +#define PCIE_DESC_RX_I_IP4_CSUM_OK cpu_to_le16(BIT(13)) +#define PCIE_DESC_RX_I_TCP_CSUM cpu_to_le16(BIT(12)) +#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) +#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) +#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) +#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) +#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) +#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) +#define PCIE_DESC_RX_TCP_CSUM cpu_to_le16(BIT(4)) +#define PCIE_DESC_RX_TCP_CSUM_OK cpu_to_le16(BIT(3)) +#define PCIE_DESC_RX_UDP_CSUM cpu_to_le16(BIT(2)) +#define PCIE_DESC_RX_UDP_CSUM_OK cpu_to_le16(BIT(1)) +#define PCIE_DESC_RX_VLAN cpu_to_le16(BIT(0)) + +#define PCIE_DESC_RX_CSUM_ALL (PCIE_DESC_RX_IP4_CSUM | \ + PCIE_DESC_RX_TCP_CSUM | \ + PCIE_DESC_RX_UDP_CSUM | \ + PCIE_DESC_RX_I_IP4_CSUM | \ + PCIE_DESC_RX_I_TCP_CSUM | \ + PCIE_DESC_RX_I_UDP_CSUM) +#define PCIE_DESC_RX_CSUM_OK_SHIFT 1 +#define __PCIE_DESC_RX_CSUM_ALL le16_to_cpu(PCIE_DESC_RX_CSUM_ALL) +#define __PCIE_DESC_RX_CSUM_ALL_OK (__PCIE_DESC_RX_CSUM_ALL >> \ + PCIE_DESC_RX_CSUM_OK_SHIFT) + +struct nfp_net_rx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of the buf address */ + __le16 reserved; /* Must be zero */ + u8 meta_len_dd; /* Must be zero */ + + __le32 dma_addr_lo; /* Low bits of the buffer address */ + } __packed fld; + + struct { + __le16 data_len; /* Length of the frame + meta data */ + u8 reserved; + u8 meta_len_dd; /* Length of meta data prepended + + * descriptor done flag. + */ + + __le16 flags; /* RX flags. See @PCIE_DESC_RX_* */ + __le16 vlan; /* VLAN if stripped */ + } __packed rxd; + + __le32 vals[2]; + }; +}; + +struct nfp_net_rx_hash { + __be32 hash_type; + __be32 hash; +}; + +/** + * struct nfp_net_rx_buf - software RX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + */ +struct nfp_net_rx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; +}; + +/** + * struct nfp_net_rx_ring - RX ring structure + * @r_vec: Back pointer to ring vector structure + * @cnt: Size of the queue in number of descriptors + * @wr_p: FL/RX ring write pointer (free running) + * @rd_p: FL/RX ring read pointer (free running) + * @idx: Ring index from Linux's perspective + * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist + * @rx_qcidx: Queue Controller Peripheral (QCP) queue index for the RX queue + * @qcp_fl: Pointer to base of the QCP freelist queue + * @qcp_rx: Pointer to base of the QCP RX queue + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for free list batching) + * @rxbufs: Array of transmitted FL/RX buffers + * @rxds: Virtual address of FL/RX ring in host memory + * @dma: DMA address of the FL/RX ring + * @size: Size, in bytes, of the FL/RX ring (needed to free) + */ +struct nfp_net_rx_ring { + struct nfp_net_r_vector *r_vec; + + u32 cnt; + u32 wr_p; + u32 rd_p; + + u16 idx; + u16 wr_ptr_add; + + int fl_qcidx; + int rx_qcidx; + u8 __iomem *qcp_fl; + u8 __iomem *qcp_rx; + + struct nfp_net_rx_buf *rxbufs; + struct nfp_net_rx_desc *rxds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/** + * struct nfp_net_r_vector - Per ring interrupt vector configuration + * @nfp_net: Backpointer to nfp_net structure + * @napi: NAPI structure for this ring vec + * @tx_ring: Pointer to TX ring + * @rx_ring: Pointer to RX ring + * @irq_idx: Index into MSI-X table + * @rx_sync: Seqlock for atomic updates of RX stats + * @rx_pkts: Number of received packets + * @rx_bytes: Number of received bytes + * @rx_drops: Number of packets dropped on RX due to lack of resources + * @hw_csum_rx_ok: Counter of packets where the HW checksum was OK + * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK + * @hw_csum_rx_error: Counter of packets with bad checksums + * @tx_sync: Seqlock for atomic updates of TX stats + * @tx_pkts: Number of Transmitted packets + * @tx_bytes: Number of Transmitted bytes + * @hw_csum_tx: Counter of packets with TX checksum offload requested + * @hw_csum_tx_inner: Counter of inner TX checksum offload requests + * @tx_gather: Counter of packets with Gather DMA + * @tx_lso: Counter of LSO packets sent + * @tx_errors: How many TX errors were encountered + * @tx_busy: How often was TX busy (no space)? + * @handler: Interrupt handler for this ring vector + * @name: Name of the interrupt vector + * @affinity_mask: SMP affinity mask for this vector + * + * This structure ties RX and TX rings to interrupt vectors and a NAPI + * context. This currently only supports one RX and TX ring per + * interrupt vector but might be extended in the future to allow + * association of multiple rings per vector. + */ +struct nfp_net_r_vector { + struct nfp_net *nfp_net; + struct napi_struct napi; + + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_rx_ring *rx_ring; + + int irq_idx; + + struct u64_stats_sync rx_sync; + u64 rx_pkts; + u64 rx_bytes; + u64 rx_drops; + u64 hw_csum_rx_ok; + u64 hw_csum_rx_inner_ok; + u64 hw_csum_rx_error; + + struct u64_stats_sync tx_sync; + u64 tx_pkts; + u64 tx_bytes; + u64 hw_csum_tx; + u64 hw_csum_tx_inner; + u64 tx_gather; + u64 tx_lso; + u64 tx_errors; + u64 tx_busy; + + irq_handler_t handler; + char name[IFNAMSIZ + 8]; + cpumask_t affinity_mask; +} ____cacheline_aligned; + +/* Firmware version as it is written in the 32bit value in the BAR */ +struct nfp_net_fw_version { + u8 minor; + u8 major; + u8 class; + u8 resv; +} __packed; + +static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, + u8 resv, u8 class, u8 major, u8 minor) +{ + return fw_ver->resv == resv && + fw_ver->class == class && + fw_ver->major == major && + fw_ver->minor == minor; +} + +/** + * struct nfp_net - NFP network device structure + * @pdev: Backpointer to PCI device + * @netdev: Backpointer to net_device structure + * @nfp_fallback: Is the driver used in fallback mode? + * @is_vf: Is the driver attached to a VF? + * @is_nfp3200: Is the driver for a NFP-3200 card? + * @fw_loaded: Is the firmware loaded? + * @ctrl: Local copy of the control register/word. + * @fl_bufsz: Currently configured size of the freelist buffers + * @rx_offset: Offset in the RX buffers where packet data starts + * @cpp: Pointer to the CPP handle + * @nfp_dev_cpp: Pointer to the NFP Device handle + * @ctrl_area: Pointer to the CPP area for the control BAR + * @tx_area: Pointer to the CPP area for the TX queues + * @rx_area: Pointer to the CPP area for the FL/RX queues + * @fw_ver: Firmware version + * @cap: Capabilities advertised by the Firmware + * @max_mtu: Maximum support MTU advertised by the Firmware + * @rss_cfg: RSS configuration + * @rss_key: RSS secret key + * @rss_itbl: RSS indirection table + * @max_tx_rings: Maximum number of TX rings supported by the Firmware + * @max_rx_rings: Maximum number of RX rings supported by the Firmware + * @num_tx_rings: Currently configured number of TX rings + * @num_rx_rings: Currently configured number of RX rings + * @txd_cnt: Size of the TX ring in number of descriptors + * @rxd_cnt: Size of the RX ring in number of descriptors + * @tx_rings: Array of pre-allocated TX ring structures + * @rx_rings: Array of pre-allocated RX ring structures + * @num_irqs: Number of allocated interrupt vectors + * @num_r_vecs: Number of used ring vectors + * @r_vecs: Pre-allocated array of ring vectors + * @irq_entries: Pre-allocated array of MSI-X entries + * @lsc_handler: Handler for Link State Change interrupt + * @lsc_name: Name for Link State Change interrupt + * @exn_handler: Handler for Exception interrupt + * @exn_name: Name for Exception interrupt + * @shared_handler: Handler for shared interrupts + * @shared_name: Name for shared interrupt + * @me_freq_mhz: ME clock_freq (MHz) + * @reconfig_lock: Protects HW reconfiguration request regs/machinery + * @link_up: Is the link up? + * @link_status_lock: Protects @link_up and ensures atomicity with BAR reading + * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter + * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter + * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter + * @tx_coalesce_max_frames: TX interrupt moderation frame count parameter + * @vxlan_ports: VXLAN ports for RX inner csum offload communicated to HW + * @vxlan_usecnt: IPv4/IPv6 VXLAN port use counts + * @qcp_cfg: Pointer to QCP queue used for configuration notification + * @ctrl_bar: Pointer to mapped control BAR + * @tx_bar: Pointer to mapped TX queues + * @rx_bar: Pointer to mapped FL/RX queues + * @debugfs_dir: Device directory in debugfs + */ +struct nfp_net { + struct pci_dev *pdev; + struct net_device *netdev; + + unsigned nfp_fallback:1; + unsigned is_vf:1; + unsigned is_nfp3200:1; + unsigned fw_loaded:1; + + u32 ctrl; + u32 fl_bufsz; + + u32 rx_offset; + +#ifdef CONFIG_PCI_IOV + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + int vf_rate_link_speed; +#endif + + struct nfp_cpp *cpp; + struct platform_device *nfp_dev_cpp; + struct nfp_cpp_area *ctrl_area; + struct nfp_cpp_area *tx_area; + struct nfp_cpp_area *rx_area; + + struct nfp_net_fw_version fw_ver; + u32 cap; + u32 max_mtu; + + u32 rss_cfg; + u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; + u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + + int max_tx_rings; + int max_rx_rings; + + int num_tx_rings; + int num_rx_rings; + + int stride_tx; + int stride_rx; + + int txd_cnt; + int rxd_cnt; + + struct nfp_net_tx_ring tx_rings[NFP_NET_MAX_TX_RINGS]; + struct nfp_net_rx_ring rx_rings[NFP_NET_MAX_RX_RINGS]; + + u8 num_irqs; + u8 num_r_vecs; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS]; + struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + + NFP_NET_MAX_TX_RINGS]; + + irq_handler_t lsc_handler; + char lsc_name[IFNAMSIZ + 8]; + + irq_handler_t exn_handler; + char exn_name[IFNAMSIZ + 8]; + + irq_handler_t shared_handler; + char shared_name[IFNAMSIZ + 8]; + + u32 me_freq_mhz; + + bool link_up; + spinlock_t link_status_lock; + + spinlock_t reconfig_lock; + + u32 rx_coalesce_usecs; + u32 rx_coalesce_max_frames; + u32 tx_coalesce_usecs; + u32 tx_coalesce_max_frames; + + __be16 vxlan_ports[NFP_NET_N_VXLAN_PORTS]; + u8 vxlan_usecnt[NFP_NET_N_VXLAN_PORTS]; + + u8 __iomem *qcp_cfg; + + u8 __iomem *ctrl_bar; + u8 __iomem *q_bar; + u8 __iomem *tx_bar; + u8 __iomem *rx_bar; + + struct dentry *debugfs_dir; +}; + +/* Functions to read/write from/to a BAR + * Performs any endian conversion necessary. + */ +static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) +{ + writeb(val, nn->ctrl_bar + off); +} + +/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ + +static inline u32 nn_readl(struct nfp_net *nn, int off) +{ + return readl(nn->ctrl_bar + off); +} + +static inline void nn_writel(struct nfp_net *nn, int off, u32 val) +{ + writel(val, nn->ctrl_bar + off); +} + +static inline u64 nn_readq(struct nfp_net *nn, int off) +{ + return readq(nn->ctrl_bar + off); +} + +static inline void nn_writeq(struct nfp_net *nn, int off, u64 val) +{ + writeq(val, nn->ctrl_bar + off); +} + +/* Flush posted PCI writes by reading something without side effects */ +static inline void nn_pci_flush(struct nfp_net *nn) +{ + nn_readl(nn, NFP_NET_CFG_VERSION); +} + +/* Queue Controller Peripheral access functions and definitions. + * + * Some of the BARs of the NFP are mapped to portions of the Queue + * Controller Peripheral (QCP) address space on the NFP. A QCP queue + * has a read and a write pointer (as well as a size and flags, + * indicating overflow etc). The QCP offers a number of different + * operation on queue pointers, but here we only offer function to + * either add to a pointer or to read the pointer value. + */ +#define NFP_QCP_QUEUE_ADDR_SZ 0x800 +#define NFP_QCP_QUEUE_OFF(_x) ((_x) * NFP_QCP_QUEUE_ADDR_SZ) +#define NFP_QCP_QUEUE_ADD_RPTR 0x0000 +#define NFP_QCP_QUEUE_ADD_WPTR 0x0004 +#define NFP_QCP_QUEUE_STS_LO 0x0008 +#define NFP_QCP_QUEUE_STS_LO_READPTR_mask 0x3ffff +#define NFP_QCP_QUEUE_STS_HI 0x000c +#define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff + +/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */ +#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) + +/* nfp_qcp_ptr - Read or Write Pointer of a queue */ +enum nfp_qcp_ptr { + NFP_QCP_READ_PTR = 0, + NFP_QCP_WRITE_PTR +}; + +/* There appear to be an *undocumented* upper limit on the value which + * one can add to a queue and that value is either 0x3f or 0x7f. We + * go with 0x3f as a conservative measure. + */ +#define NFP_QCP_MAX_ADD 0x3f + +static inline void _nfp_qcp_ptr_add(u8 __iomem *q, + enum nfp_qcp_ptr ptr, u32 val) +{ + u32 off; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_ADD_RPTR; + else + off = NFP_QCP_QUEUE_ADD_WPTR; + + while (val > NFP_QCP_MAX_ADD) { + writel(NFP_QCP_MAX_ADD, q + off); + val -= NFP_QCP_MAX_ADD; + } + + writel(val, q + off); +} + +/** + * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_READ_PTR, val); +} + +/** + * nfp_qcp_wr_ptr_add() - Add the value to the write pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_WRITE_PTR, val); +} + +static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr) +{ + u32 off; + u32 val; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_STS_LO; + else + off = NFP_QCP_QUEUE_STS_HI; + + val = readl(q + off); + + if (ptr == NFP_QCP_READ_PTR) + return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; + else + return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; +} + +/** + * nfp_qcp_rd_ptr_read() - Read the current read pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_rd_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_READ_PTR); +} + +/** + * nfp_qcp_wr_ptr_read() - Read the current write pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR); +} + +/* Globals */ +extern const char nfp_net_driver_name[]; +extern const char nfp_net_driver_version[]; + +/* Prototypes */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar); + +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings); +void nfp_net_netdev_free(struct nfp_net *nn); +int nfp_net_netdev_init(struct net_device *netdev); +void nfp_net_netdev_clean(struct net_device *netdev); +void nfp_net_set_ethtool_ops(struct net_device *netdev); +void nfp_net_info(struct nfp_net *nn); +int nfp_net_reconfig(struct nfp_net *nn, u32 update); +void nfp_net_rss_write_itbl(struct nfp_net *nn); +void nfp_net_rss_write_key(struct nfp_net *nn); +void nfp_net_coalesce_write_cfg(struct nfp_net *nn); +int nfp_net_irqs_alloc(struct nfp_net *nn); +void nfp_net_irqs_disable(struct nfp_net *nn); + +#ifdef CONFIG_NFP_NET_DEBUG +void nfp_net_debugfs_create(void); +void nfp_net_debugfs_destroy(void); +void nfp_net_debugfs_adapter_add(struct nfp_net *nn); +void nfp_net_debugfs_adapter_del(struct nfp_net *nn); +#else +static inline void nfp_net_debugfs_create(void) +{ +} + +static inline void nfp_net_debugfs_destroy(void) +{ +} + +static inline void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ +} + +static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ +} +#endif /* CONFIG_NFP_NET_DEBUG */ + +#endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c new file mode 100644 index 000000000000..038ac6b14a60 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -0,0 +1,2432 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_common.c + * Netronome network device driver: Common functions between PF and VF + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + * Chris Telfer <chris.telfer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/msi.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/if_vlan.h> +#include <linux/random.h> + +#include <linux/ktime.h> + +#include <net/vxlan.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/** + * nfp_net_get_fw_version() - Read and parse the FW version + * @fw_ver: Output fw_version structure to read to + * @ctrl_bar: Mapped address of the control BAR + */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar) +{ + u32 reg; + + reg = readl(ctrl_bar + NFP_NET_CFG_VERSION); + put_unaligned_le32(reg, fw_ver); +} + +/** + * nfp_net_reconfig() - Reconfigure the firmware + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Write the update word to the BAR and ping the reconfig queue. The + * poll until the firmware has acknowledged the update by zeroing the + * update word. + * + * Return: Negative errno on error, 0 on success + */ +int nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int cnt, ret = 0; + u32 new; + + spin_lock_bh(&nn->reconfig_lock); + + nn_writel(nn, NFP_NET_CFG_UPDATE, update); + /* ensure update is written before pinging HW */ + nn_pci_flush(nn); + nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); + + /* Poll update field, waiting for NFP to ack the config */ + for (cnt = 0; ; cnt++) { + new = nn_readl(nn, NFP_NET_CFG_UPDATE); + if (new == 0) + break; + if (new & NFP_NET_CFG_UPDATE_ERR) { + nn_err(nn, "Reconfig error: 0x%08x\n", new); + ret = -EIO; + break; + } else if (cnt >= NFP_NET_POLL_TIMEOUT) { + nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n", + update, cnt); + ret = -EIO; + break; + } + mdelay(1); + } + + spin_unlock_bh(&nn->reconfig_lock); + return ret; +} + +/* Interrupt configuration and handling + */ + +/** + * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * Clear the MSI-X table mask bit for the given entry bypassing Linux irq + * handling subsystem. Use *only* to reenable automasked vectors. + */ +static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) +{ + struct list_head *msi_head = &nn->pdev->dev.msi_list; + struct msi_desc *entry; + u32 off; + + /* All MSI-Xs have the same mask_base */ + entry = list_first_entry(msi_head, struct msi_desc, list); + + off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + + PCI_MSIX_ENTRY_VECTOR_CTRL; + writel(0, entry->mask_base + off); + readl(entry->mask_base); +} + +/** + * nfp_net_irq_unmask() - Unmask automasked interrupt + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * If MSI-X auto-masking is enabled clear the mask bit, otherwise + * clear the ICR for the entry. + */ +static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +{ + if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { + nfp_net_irq_unmask_msix(nn, entry_nr); + return; + } + + nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); + nn_pci_flush(nn); +} + +/** + * nfp_net_msix_alloc() - Try to allocate MSI-X irqs + * @nn: NFP Network structure + * @nr_vecs: Number of MSI-X vectors to allocate + * + * For MSI-X we want at least NFP_NET_NON_Q_VECTORS + 1 vectors. + * + * Return: Number of MSI-X vectors obtained or 0 on error. + */ +static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) +{ + struct pci_dev *pdev = nn->pdev; + int nvecs; + int i; + + for (i = 0; i < nr_vecs; i++) + nn->irq_entries[i].entry = i; + + nvecs = pci_enable_msix_range(pdev, nn->irq_entries, + NFP_NET_NON_Q_VECTORS + 1, nr_vecs); + if (nvecs < 0) { + nn_warn(nn, "Failed to enable MSI-X. Wanted %d-%d (err=%d)\n", + NFP_NET_NON_Q_VECTORS + 1, nr_vecs, nvecs); + return 0; + } + + return nvecs; +} + +/** + * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want + * @nn: NFP Network structure + * + * We want a vector per CPU (or ring), whatever is smaller plus + * NFP_NET_NON_Q_VECTORS for LSC etc. + * + * Return: Number of interrupts wanted + */ +static int nfp_net_irqs_wanted(struct nfp_net *nn) +{ + int ncpus; + int vecs; + + ncpus = num_online_cpus(); + + vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); + vecs = min_t(int, vecs, ncpus); + + return vecs + NFP_NET_NON_Q_VECTORS; +} + +/** + * nfp_net_irqs_alloc() - allocates MSI-X irqs + * @nn: NFP Network structure + * + * Return: Number of irqs obtained or 0 on error. + */ +int nfp_net_irqs_alloc(struct nfp_net *nn) +{ + int wanted_irqs; + + wanted_irqs = nfp_net_irqs_wanted(nn); + + nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); + if (nn->num_irqs == 0) { + nn_err(nn, "Failed to allocate MSI-X IRQs\n"); + return 0; + } + + nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; + + if (nn->num_irqs < wanted_irqs) + nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", + wanted_irqs, nn->num_irqs); + + return nn->num_irqs; +} + +/** + * nfp_net_irqs_disable() - Disable interrupts + * @nn: NFP Network structure + * + * Undoes what @nfp_net_irqs_alloc() does. + */ +void nfp_net_irqs_disable(struct nfp_net *nn) +{ + pci_disable_msix(nn->pdev); +} + +/** + * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings. + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) +{ + struct nfp_net_r_vector *r_vec = data; + + napi_schedule_irqoff(&r_vec->napi); + + /* The FW auto-masks any interrupt, either via the MASK bit in + * the MSI-X table or via the per entry ICR field. So there + * is no need to disable interrupts here. + */ + return IRQ_HANDLED; +} + +/** + * nfp_net_read_link_status() - Reread link status from control BAR + * @nn: NFP Network structure + */ +static void nfp_net_read_link_status(struct nfp_net *nn) +{ + unsigned long flags; + bool link_up; + u32 sts; + + spin_lock_irqsave(&nn->link_status_lock, flags); + + sts = nn_readl(nn, NFP_NET_CFG_STS); + link_up = !!(sts & NFP_NET_CFG_STS_LINK); + + if (nn->link_up == link_up) + goto out; + + nn->link_up = link_up; + + if (nn->link_up) { + netif_carrier_on(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Up\n"); + } else { + netif_carrier_off(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Down\n"); + } +out: + spin_unlock_irqrestore(&nn->link_status_lock, flags); +} + +/** + * nfp_net_irq_lsc() - Interrupt service routine for link state changes + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_lsc(int irq, void *data) +{ + struct nfp_net *nn = data; + + nfp_net_read_link_status(nn); + + nfp_net_irq_unmask(nn, NFP_NET_IRQ_LSC_IDX); + + return IRQ_HANDLED; +} + +/** + * nfp_net_irq_exn() - Interrupt service routine for exceptions + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_exn(int irq, void *data) +{ + struct nfp_net *nn = data; + + nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__); + /* XXX TO BE IMPLEMENTED */ + return IRQ_HANDLED; +} + +/** + * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring + * @tx_ring: TX ring structure + */ +static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); +} + +/** + * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring + * @rx_ring: RX ring structure + */ +static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; + rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1); + + rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); + rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx); +} + +/** + * nfp_net_irqs_assign() - Assign IRQs and setup rvecs. + * @netdev: netdev structure + */ +static void nfp_net_irqs_assign(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_r_vector *r_vec; + int r; + + /* Assumes nn->num_tx_rings == nn->num_rx_rings */ + if (nn->num_tx_rings > nn->num_r_vecs) { + nn_warn(nn, "More rings (%d) than vectors (%d).\n", + nn->num_tx_rings, nn->num_r_vecs); + nn->num_tx_rings = nn->num_r_vecs; + nn->num_rx_rings = nn->num_r_vecs; + } + + nn->lsc_handler = nfp_net_irq_lsc; + nn->exn_handler = nfp_net_irq_exn; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + r_vec->nfp_net = nn; + r_vec->handler = nfp_net_irq_rxtx; + r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r; + + cpumask_set_cpu(r, &r_vec->affinity_mask); + + r_vec->tx_ring = &nn->tx_rings[r]; + nn->tx_rings[r].idx = r; + nn->tx_rings[r].r_vec = r_vec; + nfp_net_tx_ring_init(r_vec->tx_ring); + + r_vec->rx_ring = &nn->rx_rings[r]; + nn->rx_rings[r].idx = r; + nn->rx_rings[r].r_vec = r_vec; + nfp_net_rx_ring_init(r_vec->rx_ring); + } +} + +/** + * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @format: printf-style format to construct the interrupt name + * @name: Pointer to allocated space for interrupt name + * @name_sz: Size of space for interrupt name + * @vector_idx: Index of MSI-X vector used for this interrupt + * @handler: IRQ handler to register for this interrupt + */ +static int +nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, + const char *format, char *name, size_t name_sz, + unsigned int vector_idx, irq_handler_t handler) +{ + struct msix_entry *entry; + int err; + + entry = &nn->irq_entries[vector_idx]; + + snprintf(name, name_sz, format, netdev_name(nn->netdev)); + err = request_irq(entry->vector, handler, 0, name, nn); + if (err) { + nn_err(nn, "Failed to request IRQ %d (err=%d).\n", + entry->vector, err); + return err; + } + nn_writeb(nn, ctrl_offset, vector_idx); + + return 0; +} + +/** + * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @vector_idx: Index of MSI-X vector used for this interrupt + */ +static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, + unsigned int vector_idx) +{ + nn_writeb(nn, ctrl_offset, 0xff); + free_irq(nn->irq_entries[vector_idx].vector, nn); +} + +/* Transmit + * + * One queue controller peripheral queue is used for transmit. The + * driver en-queues packets for transmit by advancing the write + * pointer. The device indicates that packets have transmitted by + * advancing the read pointer. The driver maintains a local copy of + * the read and write pointer in @struct nfp_net_tx_ring. The driver + * keeps @wr_p in sync with the queue controller write pointer and can + * determine how many packets have been transmitted by comparing its + * copy of the read pointer @rd_p with the read pointer maintained by + * the queue controller peripheral. + */ + +/** + * nfp_net_tx_full() - Check if the TX ring is full + * @tx_ring: TX ring to check + * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) + * + * This function checks, based on the *host copy* of read/write + * pointer if a given TX ring is full. The real TX queue may have + * some newly made available slots. + * + * Return: True if the ring is full. + */ +static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +{ + return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); +} + +/* Wrappers for deciding when to stop and restart TX queues */ +static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); +} + +static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); +} + +/** + * nfp_net_tx_ring_stop() - stop tx ring + * @nd_q: netdev queue + * @tx_ring: driver tx queue structure + * + * Safely stop TX ring. Remember that while we are running .start_xmit() + * someone else may be cleaning the TX ring completions so we need to be + * extra careful here. + */ +static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_net_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +/** + * nfp_net_tx_tso() - Set up Tx descriptor for LSO + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to HW TX descriptor + * @skb: Pointer to SKB + * + * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. + * Return error on packet header greater than maximum supported LSO header size. + */ +static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + u32 hdrlen; + u16 mss; + + if (!skb_is_gso(skb)) + return; + + if (!skb->encapsulation) + hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + else + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; + txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); + + mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK; + txd->l4_offset = hdrlen; + txd->mss = cpu_to_le16(mss); + txd->flags |= PCIE_DESC_TX_LSO; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to TX descriptor + * @skb: Pointer to SKB + * + * This function sets the TX checksum flags in the TX descriptor based + * on the configuration and the protocol of the packet to be transmitted. + */ +static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 l4_hdr; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return; + + txd->flags |= PCIE_DESC_TX_CSUM; + if (skb->encapsulation) + txd->flags |= PCIE_DESC_TX_ENCAP; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { + txd->flags |= PCIE_DESC_TX_IP4_CSUM; + l4_hdr = iph->protocol; + } else if (ipv6h->version == 6) { + l4_hdr = ipv6h->nexthdr; + } else { + nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n", + iph->version); + return; + } + + switch (l4_hdr) { + case IPPROTO_TCP: + txd->flags |= PCIE_DESC_TX_TCP_CSUM; + break; + case IPPROTO_UDP: + txd->flags |= PCIE_DESC_TX_UDP_CSUM; + break; + default: + nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n", + l4_hdr); + return; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (skb->encapsulation) + r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; + else + r_vec->hw_csum_tx += txbuf->pkt_cnt; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + const struct skb_frag_struct *frag; + struct nfp_net_r_vector *r_vec; + struct nfp_net_tx_desc *txd, txdg; + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_ring *tx_ring; + struct netdev_queue *nd_q; + dma_addr_t dma_addr; + unsigned int fsize; + int f, nr_frags; + int wr_idx; + u16 qidx; + + qidx = skb_get_queue_mapping(skb); + tx_ring = &nn->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + nd_q = netdev_get_tx_queue(nn->netdev, qidx); + + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { + nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + netif_tx_stop_queue(nd_q); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + /* Start with the head skbuf */ + dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_free; + + wr_idx = tx_ring->wr_p % tx_ring->cnt; + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = skb->len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->l4_offset = 0; + + nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); + + nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); + + if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { + txd->flags |= PCIE_DESC_TX_VLAN; + txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); + } + + /* Gather DMA */ + if (nr_frags > 0) { + /* all descs must match except for in addr, length and eop */ + txdg = *txd; + + for (f = 0; f < nr_frags; f++) { + frag = &skb_shinfo(skb)->frags[f]; + fsize = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, + fsize, DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_unmap; + + wr_idx = (wr_idx + 1) % tx_ring->cnt; + tx_ring->txbufs[wr_idx].skb = skb; + tx_ring->txbufs[wr_idx].dma_addr = dma_addr; + tx_ring->txbufs[wr_idx].fidx = f; + + txd = &tx_ring->txds[wr_idx]; + *txd = txdg; + txd->dma_len = cpu_to_le16(fsize); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->offset_eop = + (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_gather++; + u64_stats_update_end(&r_vec->tx_sync); + } + + netdev_tx_sent_queue(nd_q, txbuf->real_len); + + tx_ring->wr_p += nr_frags + 1; + if (nfp_net_tx_ring_should_stop(tx_ring)) + nfp_net_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += nr_frags + 1; + if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { + /* force memory write before we let HW know */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; + } + + skb_tx_timestamp(skb); + + return NETDEV_TX_OK; + +err_unmap: + --f; + while (f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[wr_idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; + wr_idx = wr_idx - 1; + if (wr_idx < 0) + wr_idx += tx_ring->cnt; + } + dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; +err_free: + nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_net_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * + * Return: Number of completed TX descriptors + */ +static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + u32 done_pkts = 0, done_bytes = 0; + struct sk_buff *skb; + int todo, nr_frags; + u32 qcp_rd_p; + int fidx; + int idx; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + if (qcp_rd_p > tx_ring->qcp_rd_p) + todo = qcp_rd_p - tx_ring->qcp_rd_p; + else + todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + + while (todo--) { + idx = tx_ring->rd_p % tx_ring->cnt; + tx_ring->rd_p++; + + skb = tx_ring->txbufs[idx].skb; + if (!skb) + continue; + + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + done_pkts += tx_ring->txbufs[idx].pkt_cnt; + done_bytes += tx_ring->txbufs[idx].real_len; + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_net_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +/** + * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring + * @tx_ring: TX ring structure + * + * Assumes that the device is stopped + */ +static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + struct sk_buff *skb; + int nr_frags; + int fidx; + int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = tx_ring->rd_p % tx_ring->cnt; + + skb = tx_ring->txbufs[idx].skb; + if (skb) { + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), + DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx])); + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + } + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +static void nfp_net_tx_timeout(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + for (i = 0; i < nn->num_tx_rings; i++) { + if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) + continue; + nn_warn(nn, "TX timeout on ring: %d\n", i); + } + nn_warn(nn, "TX watchdog timeout\n"); +} + +/* Receive processing + */ + +/** + * nfp_net_rx_space() - return the number of free slots on the RX ring + * @rx_ring: RX ring structure + * + * Make sure we leave at least one slot free. + * + * Return: True if there is space on the RX ring + */ +static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); +} + +/** + * nfp_net_rx_alloc_one() - Allocate and map skb for RX + * @rx_ring: RX ring structure of the skb + * @dma_addr: Pointer to storage for DMA address (output param) + * + * This function will allcate a new skb, map it for DMA. + * + * Return: allocated skb or NULL on failure. + */ +static struct sk_buff * +nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct sk_buff *skb; + + skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz); + if (!skb) { + nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); + return NULL; + } + + *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, + nn->fl_bufsz, DMA_FROM_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return skb; +} + +/** + * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings + * @rx_ring: RX ring structure + * @skb: Skb to put on rings + * @dma_addr: DMA address of skb mapping + */ +static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, + struct sk_buff *skb, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = rx_ring->wr_p % rx_ring->cnt; + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].skb = skb; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr); + + rx_ring->wr_p++; + rx_ring->wr_ptr_add++; + if (rx_ring->wr_ptr_add >= NFP_NET_FL_BATCH) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, rx_ring->wr_ptr_add); + rx_ring->wr_ptr_add = 0; + } +} + +/** + * nfp_net_rx_flush() - Free any buffers currently on the RX ring + * @rx_ring: RX ring to remove buffers from + * + * Assumes that the device is stopped + */ +static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int idx; + + while (rx_ring->rd_p != rx_ring->wr_p) { + idx = rx_ring->rd_p % rx_ring->cnt; + + if (rx_ring->rxbufs[idx].skb) { + dma_unmap_single(&pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_ring->rxbufs[idx].skb); + rx_ring->rxbufs[idx].dma_addr = 0; + rx_ring->rxbufs[idx].skb = NULL; + } + + memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx])); + + rx_ring->rd_p++; + } +} + +/** + * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers + * @rx_ring: RX ring to fill + * + * Try to fill as many buffers as possible into freelist. Return + * number of buffers added. + * + * Return: Number of freelist buffers added. + */ +static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct sk_buff *skb; + dma_addr_t dma_addr; + + while (nfp_net_rx_space(rx_ring)) { + skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr); + if (!skb) { + nfp_net_rx_flush(rx_ring); + return -ENOMEM; + } + nfp_net_rx_give_one(rx_ring, skb, dma_addr); + } + + return 0; +} + +/** + * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_net_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags + * @nn: NFP Net device + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @skb: Pointer to SKB + */ +static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_desc *rxd, struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(nn->netdev->features & NETIF_F_RXCSUM)) + return; + + if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +/** + * nfp_net_set_hash() - Set SKB hash data + * @netdev: adapter's net_device structure + * @skb: SKB to set the hash data on + * @rxd: RX descriptor + * + * The RSS hash and hash-type are pre-pended to the packet data. + * Extract and decode it and set the skb fields. + */ +static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || + !(netdev->features & NETIF_F_RXHASH)) + return; + + rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); + + switch (be32_to_cpu(rx_hash->hash_type)) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); + break; + } +} + +/** + * nfp_net_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * There are differences between the NFP-3200 firmware and the + * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue + * to indicate that new packets have arrived. The NFP-6000 does not + * have this queue and uses the DD bit in the RX descriptor. This + * method cannot be used on the NFP-3200 as it causes a race + * condition: The RX ring write pointer on the NFP-3200 is updated + * after packets (and descriptors) have been DMAed. If the DD bit is + * used and subsequently the read pointer is updated this may lead to + * the RX queue to underflow (if the firmware has not yet update the + * write pointer). Therefore we use slightly ugly conditional code + * below to handle the differences. We may, in the future update the + * NFP-3200 firmware to behave the same as the firmware on the + * NFP-6000. + * + * Return: Number of packets received. + */ +static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + unsigned int data_len, meta_len; + int avail = 0, pkts_polled = 0; + struct sk_buff *skb, *new_skb; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + u32 qcp_wr_p; + int idx; + + if (nn->is_nfp3200) { + /* Work out how many packets arrived */ + qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + idx = rx_ring->rd_p % rx_ring->cnt; + + if (qcp_wr_p == idx) + /* No new packets */ + return 0; + + if (qcp_wr_p > idx) + avail = qcp_wr_p - idx; + else + avail = qcp_wr_p + rx_ring->cnt - idx; + } else { + avail = budget + 1; + } + + while (avail > 0 && pkts_polled < budget) { + idx = rx_ring->rd_p % rx_ring->cnt; + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { + if (nn->is_nfp3200) + nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", + rx_ring->idx, idx, + rxd->vals[0], rxd->vals[1]); + break; + } + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + pkts_polled++; + avail--; + + skb = rx_ring->rxbufs[idx].skb; + + new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr); + if (!new_skb) { + nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, + rx_ring->rxbufs[idx].dma_addr); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + continue; + } + + dma_unmap_single(&nn->pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + + nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + + if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) { + dev_kfree_skb_any(skb); + continue; + } + + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) { + /* The packet data starts after the metadata */ + skb_reserve(skb, meta_len); + } else { + /* The packet data starts at a fixed offset */ + skb_reserve(skb, nn->rx_offset); + } + + /* Adjust the SKB for the dynamic meta data pre-pended */ + skb_put(skb, data_len - meta_len); + + nfp_net_set_hash(nn->netdev, skb, rxd); + + /* Pad small frames to minimum */ + if (skb_put_padto(skb, 60)) + break; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += skb->len; + u64_stats_update_end(&r_vec->rx_sync); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, nn->netdev); + + nfp_net_rx_csum(nn, r_vec, rxd, skb); + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxd->rxd.vlan)); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } + + if (nn->is_nfp3200) + nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); + + return pkts_polled; +} + +/** + * nfp_net_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +static int nfp_net_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct netdev_queue *txq; + unsigned int pkts_polled; + + tx_ring = &nn->tx_rings[rx_ring->idx]; + txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + nfp_net_tx_complete(tx_ring); + + pkts_polled = nfp_net_rx(rx_ring, budget); + + if (pkts_polled < budget) { + napi_complete_done(napi, pkts_polled); + nfp_net_irq_unmask(nn, r_vec->irq_idx); + } + + return pkts_polled; +} + +/* Setup and Configuration + */ + +/** + * nfp_net_tx_ring_free() - Free resources allocated to a TX ring + * @tx_ring: TX ring to free + */ +static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0); + + kfree(tx_ring->txbufs); + + if (tx_ring->txds) + dma_free_coherent(&pdev->dev, tx_ring->size, + tx_ring->txds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +/** + * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring + * @tx_ring: TX Ring structure to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + tx_ring->cnt = nn->txd_cnt; + + tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; + tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + if (!tx_ring->txds) + goto err_alloc; + + sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; + tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); + if (!tx_ring->txbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx); + + netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); + + nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", + tx_ring->idx, tx_ring->qcidx, + tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); + + return 0; + +err_alloc: + nfp_net_tx_ring_free(tx_ring); + return -ENOMEM; +} + +/** + * nfp_net_rx_ring_free() - Free resources allocated to a RX ring + * @rx_ring: RX ring to free + */ +static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0); + + kfree(rx_ring->rxbufs); + + if (rx_ring->rxds) + dma_free_coherent(&pdev->dev, rx_ring->size, + rx_ring->rxds, rx_ring->dma); + + rx_ring->cnt = 0; + rx_ring->wr_p = 0; + rx_ring->rd_p = 0; + + rx_ring->rxbufs = NULL; + rx_ring->rxds = NULL; + rx_ring->dma = 0; + rx_ring->size = 0; +} + +/** + * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring + * @rx_ring: RX ring to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + rx_ring->cnt = nn->rxd_cnt; + + rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; + rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + if (!rx_ring->rxds) + goto err_alloc; + + sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; + rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx); + + nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n", + rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx, + rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds); + + return 0; + +err_alloc: + nfp_net_rx_ring_free(rx_ring); + return -ENOMEM; +} + +static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + + while (n_free--) { + r_vec = &nn->r_vecs[n_free]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + nfp_net_rx_ring_free(r_vec->rx_ring); + nfp_net_tx_ring_free(r_vec->tx_ring); + + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); + + netif_napi_del(&r_vec->napi); + } +} + +/** + * nfp_net_free_rings() - Free all ring resources + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_free_rings(struct nfp_net *nn) +{ + __nfp_net_free_rings(nn, nn->num_r_vecs); +} + +/** + * nfp_net_alloc_rings() - Allocate resources for RX and TX rings + * @nn: NFP Net device to reconfigure + * + * Return: 0 on success or negative errno on error. + */ +static int nfp_net_alloc_rings(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + int err; + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + /* Setup NAPI */ + netif_napi_add(nn->netdev, &r_vec->napi, + nfp_net_poll, NAPI_POLL_WEIGHT); + + snprintf(r_vec->name, sizeof(r_vec->name), + "%s-rxtx-%d", nn->netdev->name, r); + err = request_irq(entry->vector, r_vec->handler, 0, + r_vec->name, r_vec); + if (err) { + nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector); + goto err_napi_del; + } + + irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); + + nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", + r, entry->vector, entry->entry); + + /* Allocate TX ring resources */ + err = nfp_net_tx_ring_alloc(r_vec->tx_ring); + if (err) + goto err_free_irq; + + /* Allocate RX ring resources */ + err = nfp_net_rx_ring_alloc(r_vec->rx_ring); + if (err) + goto err_free_tx; + } + + return 0; + +err_free_tx: + nfp_net_tx_ring_free(r_vec->tx_ring); +err_free_irq: + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); +err_napi_del: + netif_napi_del(&r_vec->napi); + __nfp_net_free_rings(nn, r); + return err; +} + +/** + * nfp_net_rss_write_itbl() - Write RSS indirection table to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_itbl(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i, + get_unaligned_le32(nn->rss_itbl + i)); +} + +/** + * nfp_net_rss_write_key() - Write RSS hash key to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_key(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_KEY + i, + get_unaligned_le32(nn->rss_key + i)); +} + +/** + * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW + * @nn: NFP Net device to reconfigure + */ +void nfp_net_coalesce_write_cfg(struct nfp_net *nn) +{ + u8 i; + u32 factor; + u32 value; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* copy RX interrupt coalesce parameters */ + value = (nn->rx_coalesce_max_frames << 16) | + (factor * nn->rx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); + + /* copy TX interrupt coalesce parameters */ + value = (nn->tx_coalesce_max_frames << 16) | + (factor * nn->tx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); +} + +/** + * nfp_net_write_mac_addr() - Write mac address to device registers + * @nn: NFP Net device to reconfigure + * @mac: Six-byte MAC address to be written + * + * We do a bit of byte swapping dance because firmware is LE. + */ +static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac) +{ + nn_writel(nn, NFP_NET_CFG_MACADDR + 0, + get_unaligned_be32(nn->netdev->dev_addr)); + /* We can't do writew for NFP-3200 compatibility */ + nn_writel(nn, NFP_NET_CFG_MACADDR + 4, + get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); +} + +/** + * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_clear_config_and_disable(struct nfp_net *nn) +{ + u32 new_ctrl, update; + int err; + + new_ctrl = nn->ctrl; + new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE; + update = NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG; + + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) { + nn_err(nn, "Could not disable device: %d\n", err); + return; + } + + nn->ctrl = new_ctrl; +} + +/** + * nfp_net_start_vec() - Start ring vector + * @nn: NFP Net device structure + * @r_vec: Ring vector to be started + */ +static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) +{ + unsigned int irq_vec; + int err = 0; + + irq_vec = nn->irq_entries[r_vec->irq_idx].vector; + + disable_irq(irq_vec); + + err = nfp_net_rx_fill_freelist(r_vec->rx_ring); + if (err) { + nn_err(nn, "RV%02d: couldn't allocate enough buffers\n", + r_vec->irq_idx); + goto out; + } + + napi_enable(&r_vec->napi); +out: + enable_irq(irq_vec); + + return err; +} + +static int nfp_net_netdev_open(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err, r; + u32 update = 0; + u32 new_ctrl; + + if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { + nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl); + return -EBUSY; + } + + new_ctrl = nn->ctrl; + + /* Step 1: Allocate resources for rings and the like + * - Request interrupts + * - Allocate RX and TX ring resources + * - Setup initial RSS table + */ + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn", + nn->exn_name, sizeof(nn->exn_name), + NFP_NET_IRQ_EXN_IDX, nn->exn_handler); + if (err) + return err; + + err = nfp_net_alloc_rings(nn); + if (err) + goto err_free_exn; + + err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); + if (err) + goto err_free_rings; + + err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings); + if (err) + goto err_free_rings; + + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + nfp_net_rss_write_key(nn); + nfp_net_rss_write_itbl(nn); + nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg); + update |= NFP_NET_CFG_UPDATE_RSS; + } + + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_coalesce_write_cfg(nn); + + new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + update |= NFP_NET_CFG_UPDATE_IRQMOD; + } + + /* Step 2: Configure the NFP + * - Enable rings from 0 to tx_rings/rx_rings - 1. + * - Write MAC address (in case it changed) + * - Set the MTU + * - Set the Freelist buffer size + * - Enable the FW + */ + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); + + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1); + + nfp_net_write_mac_addr(nn, netdev->dev_addr); + + nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu); + nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz); + + /* Enable device */ + new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; + update |= NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) + goto err_clear_config; + + nn->ctrl = new_ctrl; + + /* Since reconfiguration requests while NFP is down are ignored we + * have to wipe the entire VXLAN configuration and reinitialize it. + */ + if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) { + memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports)); + memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt)); + vxlan_get_rx_port(netdev); + } + + /* Step 3: Enable for kernel + * - put some freelist descriptors on each RX ring + * - enable NAPI on each ring + * - enable all TX queues + * - set link state + */ + for (r = 0; r < nn->num_r_vecs; r++) { + err = nfp_net_start_vec(nn, &nn->r_vecs[r]); + if (err) + goto err_disable_napi; + } + + netif_tx_wake_all_queues(netdev); + + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc", + nn->lsc_name, sizeof(nn->lsc_name), + NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); + if (err) + goto err_stop_tx; + nfp_net_read_link_status(nn); + + return 0; + +err_stop_tx: + netif_tx_disable(netdev); + for (r = 0; r < nn->num_r_vecs; r++) + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); +err_disable_napi: + while (r--) { + napi_disable(&nn->r_vecs[r].napi); + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + } +err_clear_config: + nfp_net_clear_config_and_disable(nn); +err_free_rings: + nfp_net_free_rings(nn); +err_free_exn: + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + return err; +} + +/** + * nfp_net_netdev_close() - Called when the device is downed + * @netdev: netdev structure + */ +static int nfp_net_netdev_close(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) { + nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl); + return 0; + } + + /* Step 1: Disable RX and TX rings from the Linux kernel perspective + */ + nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + netif_carrier_off(netdev); + nn->link_up = false; + + for (r = 0; r < nn->num_r_vecs; r++) + napi_disable(&nn->r_vecs[r].napi); + + netif_tx_disable(netdev); + + /* Step 2: Tell NFP + */ + nfp_net_clear_config_and_disable(nn); + + /* Step 3: Free resources + */ + for (r = 0; r < nn->num_r_vecs; r++) { + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); + } + + nfp_net_free_rings(nn); + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + + nn_dbg(nn, "%s down", netdev->name); + return 0; +} + +static void nfp_net_set_rx_mode(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + + new_ctrl = nn->ctrl; + + if (netdev->flags & IFF_PROMISC) { + if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) + new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; + else + nn_warn(nn, "FW does not support promiscuous mode\n"); + } else { + new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; + } + + if (new_ctrl == nn->ctrl) + return; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN)) + return; + + nn->ctrl = new_ctrl; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 tmp; + + nn_dbg(nn, "New MTU = %d\n", new_mtu); + + if (new_mtu < 68 || new_mtu > nn->max_mtu) { + nn_err(nn, "New MTU (%d) is not valid\n", new_mtu); + return -EINVAL; + } + + netdev->mtu = new_mtu; + + /* Freelist buffer size rounded up to the nearest 1K */ + tmp = new_mtu + ETH_HLEN + VLAN_HLEN + NFP_NET_MAX_PREPEND; + nn->fl_bufsz = roundup(tmp, 1024); + + /* restart if running */ + if (netif_running(netdev)) { + nfp_net_netdev_close(netdev); + nfp_net_netdev_open(netdev); + } + + return 0; +} + +static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + struct nfp_net_r_vector *r_vec = &nn->r_vecs[r]; + u64 data[3]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; + } + + return stats; +} + +static int nfp_net_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + int err; + + /* Assume this is not called with features we have not advertised */ + + new_ctrl = nn->ctrl; + + if (changed & NETIF_F_RXCSUM) { + if (features & NETIF_F_RXCSUM) + new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM; + } + + if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { + if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) + new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM; + } + + if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + new_ctrl |= NFP_NET_CFG_CTRL_LSO; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_LSO; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) + new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN; + } + + if (changed & NETIF_F_SG) { + if (features & NETIF_F_SG) + new_ctrl |= NFP_NET_CFG_CTRL_GATHER; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; + } + + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", + netdev->features, features, changed); + + if (new_ctrl == nn->ctrl) + return 0; + + nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl); + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + nn->ctrl = new_ctrl; + + return 0; +} + +static netdev_features_t +nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + u8 l4_hdr; + + /* We can't do TSO over double tagged packets (802.1AD) */ + features &= vlan_features_check(skb, features); + + if (!skb->encapsulation) + return features; + + /* Ensure that inner L4 header offset fits into TX descriptor field */ + if (skb_is_gso(skb)) { + u32 hdrlen; + + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ)) + features &= ~NETIF_F_GSO_MASK; + } + + /* VXLAN/GRE check */ + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) || + (l4_hdr == IPPROTO_UDP && + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + + return features; +} + +/** + * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW + * @nn: NFP Net device to reconfigure + * @idx: Index into the port table where new port should be written + * @port: UDP port to configure (pass zero to remove VXLAN port) + */ +static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port) +{ + int i; + + nn->vxlan_ports[idx] = port; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN)) + return; + + BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1); + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) + nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port), + be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 | + be16_to_cpu(nn->vxlan_ports[i])); + + nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); +} + +/** + * nfp_net_find_vxlan_idx() - find table entry of the port or a free one + * @nn: NFP Network structure + * @port: UDP port to look for + * + * Return: if the port is already in the table -- it's position; + * if the port is not in the table -- free position to use; + * if the table is full -- -ENOSPC. + */ +static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port) +{ + int i, free_idx = -ENOSPC; + + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) { + if (nn->vxlan_ports[i] == port) + return i; + if (!nn->vxlan_usecnt[i]) + free_idx = i; + } + + return free_idx; +} + +static void nfp_net_add_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (idx == -ENOSPC) + return; + + if (!nn->vxlan_usecnt[idx]++) + nfp_net_set_vxlan_port(nn, idx, port); +} + +static void nfp_net_del_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC) + return; + + if (!--nn->vxlan_usecnt[idx]) + nfp_net_set_vxlan_port(nn, idx, 0); +} + +static const struct net_device_ops nfp_net_netdev_ops = { + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = nfp_net_set_features, + .ndo_features_check = nfp_net_features_check, + .ndo_add_vxlan_port = nfp_net_add_vxlan_port, + .ndo_del_vxlan_port = nfp_net_del_vxlan_port, +}; + +/** + * nfp_net_info() - Print general info about the NIC + * @nn: NFP Net device to reconfigure + */ +void nfp_net_info(struct nfp_net *nn) +{ + nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", + nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", + nn->is_vf ? "VF " : "", + nn->num_tx_rings, nn->max_tx_rings, + nn->num_rx_rings, nn->max_rx_rings); + nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor, + nn->max_mtu); + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn->cap, + nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", + nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", + nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", + nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", + nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", + nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "", + nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", + nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); +} + +/** + * nfp_net_netdev_alloc() - Allocate netdev and related structure + * @pdev: PCI device + * @max_tx_rings: Maximum number of TX rings supported by device + * @max_rx_rings: Maximum number of RX rings supported by device + * + * This function allocates a netdev device and fills in the initial + * part of the @struct nfp_net structure. + * + * Return: NFP Net device structure, or ERR_PTR on error. + */ +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings) +{ + struct net_device *netdev; + struct nfp_net *nn; + int nqs; + + netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), + max_tx_rings, max_rx_rings); + if (!netdev) + return ERR_PTR(-ENOMEM); + + SET_NETDEV_DEV(netdev, &pdev->dev); + nn = netdev_priv(netdev); + + nn->netdev = netdev; + nn->pdev = pdev; + + nn->max_tx_rings = max_tx_rings; + nn->max_rx_rings = max_rx_rings; + + nqs = netif_get_num_default_rss_queues(); + nn->num_tx_rings = min_t(int, nqs, max_tx_rings); + nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + + nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; + nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; + + spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->link_status_lock); + + return nn; +} + +/** + * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did + * @nn: NFP Net device to reconfigure + */ +void nfp_net_netdev_free(struct nfp_net *nn) +{ + free_netdev(nn->netdev); +} + +/** + * nfp_net_rss_init() - Set the initial RSS parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_rss_init(struct nfp_net *nn) +{ + int i; + + netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->num_rx_rings); + + /* Enable IPv4/IPv6 TCP by default */ + nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | + NFP_NET_CFG_RSS_IPV6_TCP | + NFP_NET_CFG_RSS_TOEPLITZ | + NFP_NET_CFG_RSS_MASK; +} + +/** + * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_irqmod_init(struct nfp_net *nn) +{ + nn->rx_coalesce_usecs = 50; + nn->rx_coalesce_max_frames = 64; + nn->tx_coalesce_usecs = 50; + nn->tx_coalesce_max_frames = 64; +} + +/** + * nfp_net_netdev_init() - Initialise/finalise the netdev structure + * @netdev: netdev structure + * + * Return: 0 on success or negative errno on error. + */ +int nfp_net_netdev_init(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Get some of the read-only fields from the BAR */ + nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); + nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); + + nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); + + /* Set default MTU and Freelist buffer size */ + if (nn->max_mtu < NFP_NET_DEFAULT_MTU) + netdev->mtu = nn->max_mtu; + else + netdev->mtu = NFP_NET_DEFAULT_MTU; + nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; + + /* Advertise/enable offloads based on capabilities + * + * Note: netdev->features show the currently enabled features + * and netdev->hw_features advertises which features are + * supported. By default we enable most features. + */ + netdev->hw_features = NETIF_F_HIGHDMA; + if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) { + netdev->hw_features |= NETIF_F_RXCSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_GATHER) { + netdev->hw_features |= NETIF_F_SG; + nn->ctrl |= NFP_NET_CFG_CTRL_GATHER; + } + if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + nn->ctrl |= NFP_NET_CFG_CTRL_LSO; + } + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + netdev->hw_features |= NETIF_F_RXHASH; + nfp_net_rss_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_RSS; + } + if (nn->cap & NFP_NET_CFG_CTRL_VXLAN && + nn->cap & NFP_NET_CFG_CTRL_NVGRE) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL; + nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE; + + netdev->hw_enc_features = netdev->hw_features; + } + + netdev->vlan_features = netdev->hw_features; + + if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + } + + netdev->features = netdev->hw_features; + + /* Advertise but disable TSO by default. */ + netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + + /* Allow L2 Broadcast and Multicast through by default, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_L2BC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2BC; + if (nn->cap & NFP_NET_CFG_CTRL_L2MC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2MC; + + /* Allow IRQ moderation, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_irqmod_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + } + + /* On NFP-3200 enable MSI-X auto-masking, if supported and the + * interrupts are not shared. + */ + if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) + nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; + + /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) + nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + else + nn->rx_offset = NFP_NET_RX_OFFSET; + + /* Stash the re-configuration queue away. First odd queue in TX Bar */ + nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; + + /* Make sure the FW knows the netdev is supposed to be disabled here */ + nn_writel(nn, NFP_NET_CFG_CTRL, 0); + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | + NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + /* Finalise the netdev setup */ + ether_setup(netdev); + netdev->netdev_ops = &nfp_net_netdev_ops; + netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + + nfp_net_set_ethtool_ops(netdev); + nfp_net_irqs_assign(netdev); + + return register_netdev(netdev); +} + +/** + * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did. + * @netdev: netdev structure + */ +void nfp_net_netdev_clean(struct net_device *netdev) +{ + unregister_netdev(netdev); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h new file mode 100644 index 000000000000..8692003aeed8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ctrl.h + * Netronome network device driver: Control BAR layout + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#ifndef _NFP_NET_CTRL_H_ +#define _NFP_NET_CTRL_H_ + +/* IMPORTANT: This header file is shared with the FW, + * no OS specific constructs, please! + */ + +/** + * Configuration BAR size. + * + * The configuration BAR is 8K in size, but on the NFP6000, due to + * THB-350, 32k needs to be reserved. + */ +#define NFP_NET_CFG_BAR_SZ (32 * 1024) + +/** + * Offset in Freelist buffer where packet starts on RX + */ +#define NFP_NET_RX_OFFSET 32 + +/** + * Maximum header size supported for LSO frames + */ +#define NFP_NET_LSO_MAX_HDR_SZ 255 + +/** + * Hash type pre-pended when a RSS hash was computed + */ +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 + +/** + * @NFP_NET_TXR_MAX: Maximum number of TX rings + * @NFP_NET_TXR_MASK: Mask for TX rings + * @NFP_NET_RXR_MAX: Maximum number of RX rings + * @NFP_NET_RXR_MASK: Mask for RX rings + */ +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_TXR_MASK (NFP_NET_TXR_MAX - 1) +#define NFP_NET_RXR_MAX 64 +#define NFP_NET_RXR_MASK (NFP_NET_RXR_MAX - 1) + +/** + * Read/Write config words (0x0000 - 0x002c) + * @NFP_NET_CFG_CTRL: Global control + * @NFP_NET_CFG_UPDATE: Indicate which fields are updated + * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * @NFP_NET_CFG_MTU: Set MTU size + * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * @NFP_NET_CFG_MACADDR: MAC address + * + * TODO: + * - define Error details in UPDATE + */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ +#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ +#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ +#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 + +/** + * Read-only words (0x0030 - 0x0050): + * @NFP_NET_CFG_VERSION: Firmware version number + * @NFP_NET_CFG_STS: Status + * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) + * @NFP_NET_MAX_TXRINGS: Maximum number of TX rings + * @NFP_NET_MAX_RXRINGS: Maximum number of RX rings + * @NFP_NET_MAX_MTU: Maximum support MTU + * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * + * TODO: + * - define more STS bits + */ +#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) +#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 +#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 +/* Next two words are being used by VFs for solving THB350 issue */ +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c + +/** + * NFP-3200 workaround (0x0050 - 0x0058) + * @NFP_NET_CFG_SPARE_ADDR: DMA address for ME code to use (e.g. YDS-155 fix) + */ +#define NFP_NET_CFG_SPARE_ADDR 0x0050 +/** + * NFP6000/NFP4000 - Prepend configuration + */ +#define NFP_NET_CFG_RX_OFFSET 0x0050 +#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ + +/** + * NFP6000/NFP4000 - VXLAN/UDP encap configuration + * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + */ +#define NFP_NET_CFG_VXLAN_PORT 0x0060 +#define NFP_NET_CFG_VXLAN_SZ 0x0008 + +/** + * 64B reserved for future use (0x0080 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0080 +#define NFP_NET_CFG_RESERVED_SZ 0x0040 + +/** + * RSS configuration (0x0100 - 0x01ac): + * Used only when NFP_NET_CFG_CTRL_RSS is enabled + * @NFP_NET_CFG_RSS_CFG: RSS configuration word + * @NFP_NET_CFG_RSS_KEY: RSS "secret" key + * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ + NFP_NET_CFG_RSS_KEY_SZ) +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 + +/** + * TX ring configuration (0x200 - 0x800) + * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + */ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ + ((_x) * 0x4)) + +/** + * RX ring configuration (0x0800 - 0x0c00) + * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + */ +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ + ((_x) * 0x4)) + +/** + * Interrupt Control/Cause registers (0x0c00 - 0x0d00) + * These registers are only used when MSI-X auto-masking is not + * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * by MSI-X entry and are 1B in size. If an entry is zero, the + * corresponding entry is enabled. If the FW generates an interrupt, + * it writes a cause into the corresponding field. This also masks + * the MSI-X entry and the host driver must clear the register to + * re-enable the interrupt. + */ +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 + +/** + * General device stats (0x0d00 - 0x0d90) + * all counters are 64bit. + */ +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) + +/** + * Per ring stats (0x1000 - 0x1800) + * options, 64bit per entry + * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + */ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ + ((_x) * 0x10)) +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ + ((_x) * 0x10)) + +#endif /* _NFP_NET_CTRL_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c new file mode 100644 index 000000000000..4c97c713121c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> + +#include "nfp_net.h" + +static struct dentry *nfp_dir; + +static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_rx_ring *rx_ring = file->private; + int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt; + struct nfp_net_rx_desc *rxd; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!rx_ring->r_vec || !rx_ring->r_vec->nfp_net) + goto out; + nn = rx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + rxd_cnt = rx_ring->cnt; + + fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl); + fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl); + rx_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_rx); + rx_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + + seq_printf(file, "RX[%02d]: H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d RX_RD=%d RX_WR=%d\n", + rx_ring->idx, rx_ring->rd_p, rx_ring->wr_p, + fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p); + + for (i = 0; i < rxd_cnt; i++) { + rxd = &rx_ring->rxds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x", i, + rxd->vals[0], rxd->vals[1]); + + skb = READ_ONCE(rx_ring->rxbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + + if (i == rx_ring->rd_p % rxd_cnt) + seq_puts(file, " H_RD "); + if (i == rx_ring->wr_p % rxd_cnt) + seq_puts(file, " H_WR "); + if (i == fl_rd_p % rxd_cnt) + seq_puts(file, " FL_RD"); + if (i == fl_wr_p % rxd_cnt) + seq_puts(file, " FL_WR"); + if (i == rx_rd_p % rxd_cnt) + seq_puts(file, " RX_RD"); + if (i == rx_wr_p % rxd_cnt) + seq_puts(file, " RX_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_rx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_rx_q_read, inode->i_private); +} + +static const struct file_operations nfp_rx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_rx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_tx_ring *tx_ring = file->private; + struct nfp_net_tx_desc *txd; + int d_rd_p, d_wr_p, txd_cnt; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!tx_ring->r_vec || !tx_ring->r_vec->nfp_net) + goto out; + nn = tx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + txd_cnt = tx_ring->cnt; + + d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); + + seq_printf(file, "TX[%02d]: H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n", + tx_ring->idx, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + + for (i = 0; i < txd_cnt; i++) { + txd = &tx_ring->txds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, + txd->vals[0], txd->vals[1], + txd->vals[2], txd->vals[3]); + + skb = READ_ONCE(tx_ring->txbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + if (tx_ring->txbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &tx_ring->txbufs[i].dma_addr); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private); +} + +static const struct file_operations nfp_tx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_tx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ + static struct dentry *queues, *tx, *rx; + char int_name[16]; + int i; + + if (IS_ERR_OR_NULL(nfp_dir)) + return; + + nn->debugfs_dir = debugfs_create_dir(pci_name(nn->pdev), nfp_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + /* Create queue debugging sub-tree */ + queues = debugfs_create_dir("queue", nn->debugfs_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + rx = debugfs_create_dir("rx", queues); + tx = debugfs_create_dir("tx", queues); + if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx)) + return; + + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, rx, + &nn->rx_rings[i], &nfp_rx_q_fops); + } + + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, tx, + &nn->tx_rings[i], &nfp_tx_q_fops); + } +} + +void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ + debugfs_remove_recursive(nn->debugfs_dir); + nn->debugfs_dir = NULL; +} + +void nfp_net_debugfs_create(void) +{ + nfp_dir = debugfs_create_dir("nfp_net", NULL); +} + +void nfp_net_debugfs_destroy(void) +{ + debugfs_remove_recursive(nfp_dir); + nfp_dir = NULL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c new file mode 100644 index 000000000000..9a4084a68db5 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -0,0 +1,640 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ethtool.c + * Netronome network device driver: ethtool support + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/ethtool.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/* Support for stats. Returns netdev, driver, and device stats */ +enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS }; +struct _nfp_net_et_stats { + char name[ETH_GSTRING_LEN]; + int type; + int sz; + int off; +}; + +#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS, \ + FIELD_SIZEOF(struct net_device_stats, m), \ + offsetof(struct net_device_stats, m) +/* For stats in the control BAR (other than Q stats) */ +#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS, \ + sizeof(u64), \ + (m) +static const struct _nfp_net_et_stats nfp_net_et_stats[] = { + /* netdev stats */ + {"rx_packets", NN_ET_NETDEV_STAT(rx_packets)}, + {"tx_packets", NN_ET_NETDEV_STAT(tx_packets)}, + {"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)}, + {"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)}, + {"rx_errors", NN_ET_NETDEV_STAT(rx_errors)}, + {"tx_errors", NN_ET_NETDEV_STAT(tx_errors)}, + {"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)}, + {"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)}, + {"multicast", NN_ET_NETDEV_STAT(multicast)}, + {"collisions", NN_ET_NETDEV_STAT(collisions)}, + {"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)}, + {"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)}, + {"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)}, + {"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)}, + {"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)}, + {"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)}, + {"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)}, + {"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)}, + /* Stats from the device */ + {"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)}, + {"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)}, + {"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)}, + {"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)}, + {"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)}, + {"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)}, + {"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)}, + {"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)}, + {"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)}, + + {"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)}, + {"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)}, + {"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)}, + {"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)}, + {"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)}, + {"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)}, + {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, + {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, + {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, +}; + +#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) +#define NN_ET_RVEC_STATS_LEN (nn->num_r_vecs * 3) +#define NN_ET_RVEC_GATHER_STATS 7 +#define NN_ET_QUEUE_STATS_LEN ((nn->num_tx_rings + nn->num_rx_rings) * 2) +#define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \ + NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN) + +static void nfp_net_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct nfp_net *nn = netdev_priv(netdev); + + strlcpy(drvinfo->driver, nfp_net_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, nfp_net_driver_version, + sizeof(drvinfo->version)); + + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d.%d", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor); + strlcpy(drvinfo->bus_info, pci_name(nn->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_stats = NN_ET_STATS_LEN; + drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + + ring->rx_max_pending = NFP_NET_MAX_RX_DESCS; + ring->tx_max_pending = NFP_NET_MAX_TX_DESCS; + ring->rx_pending = nn->rxd_cnt; + ring->tx_pending = nn->txd_cnt; +} + +static int nfp_net_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 rxd_cnt, txd_cnt; + + if (netif_running(netdev)) { + /* Some NIC drivers allow reconfiguration on the fly, + * some down the interface, change and then up it + * again. For now we don't allow changes when the + * device is up. + */ + nn_warn(nn, "Can't change rings while device is up\n"); + return -EBUSY; + } + + /* We don't have separate queues/rings for small/large frames. */ + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* Round up to supported values */ + rxd_cnt = roundup_pow_of_two(ring->rx_pending); + rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS); + rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS); + + txd_cnt = roundup_pow_of_two(ring->tx_pending); + txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS); + txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS); + + if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt) + nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n", + nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt); + + nn->rxd_cnt = rxd_cnt; + nn->txd_cnt = txd_cnt; + + return 0; +} + +static void nfp_net_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_r_vecs; i++) { + sprintf(p, "rvec_%u_rx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_busy", i); + p += ETH_GSTRING_LEN; + } + strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_gather", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_lso", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(p, "txq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "txq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(p, "rxq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rxq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void nfp_net_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {}; + struct nfp_net *nn = netdev_priv(netdev); + struct rtnl_link_stats64 *netdev_stats; + struct rtnl_link_stats64 temp = {}; + u64 tmp[NN_ET_RVEC_GATHER_STATS]; + u8 __iomem *io_p; + int i, j, k; + u8 *p; + + netdev_stats = dev_get_stats(netdev, &temp); + + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + switch (nfp_net_et_stats[i].type) { + case NETDEV_ET_STATS: + p = (char *)netdev_stats + nfp_net_et_stats[i].off; + data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ? + *(u64 *)p : *(u32 *)p; + break; + + case NFP_NET_DEV_ET_STATS: + io_p = nn->ctrl_bar + nfp_net_et_stats[i].off; + data[i] = readq(io_p); + break; + } + } + for (j = 0; j < nn->num_r_vecs; j++) { + unsigned int start; + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].rx_sync); + data[i++] = nn->r_vecs[j].rx_pkts; + tmp[0] = nn->r_vecs[j].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[j].hw_csum_rx_inner_ok; + tmp[2] = nn->r_vecs[j].hw_csum_rx_error; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].rx_sync, start)); + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].tx_sync); + data[i++] = nn->r_vecs[j].tx_pkts; + data[i++] = nn->r_vecs[j].tx_busy; + tmp[3] = nn->r_vecs[j].hw_csum_tx; + tmp[4] = nn->r_vecs[j].hw_csum_tx_inner; + tmp[5] = nn->r_vecs[j].tx_gather; + tmp[6] = nn->r_vecs[j].tx_lso; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].tx_sync, start)); + + for (k = 0; k < NN_ET_RVEC_GATHER_STATS; k++) + gathered_stats[k] += tmp[k]; + } + for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + data[i++] = gathered_stats[j]; + for (j = 0; j < nn->num_tx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8; + data[i++] = readq(io_p); + } + for (j = 0; j < nn->num_rx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8; + data[i++] = readq(io_p); + } +} + +static int nfp_net_get_sset_count(struct net_device *netdev, int sset) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (sset) { + case ETH_SS_STATS: + return NN_ET_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +/* RX network flow classification (RSS, filters, etc) + */ +static u32 ethtool_flow_to_nfp_flag(u32 flow_type) +{ + static const u32 xlate_ethtool_to_nfp[IPV6_FLOW + 1] = { + [TCP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_TCP, + [TCP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_TCP, + [UDP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_UDP, + [UDP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_UDP, + [IPV4_FLOW] = NFP_NET_CFG_RSS_IPV4, + [IPV6_FLOW] = NFP_NET_CFG_RSS_IPV6, + }; + + if (flow_type >= ARRAY_SIZE(xlate_ethtool_to_nfp)) + return 0; + + return xlate_ethtool_to_nfp[flow_type]; +} + +static int nfp_net_get_rss_hash_opts(struct nfp_net *nn, + struct ethtool_rxnfc *cmd) +{ + u32 nfp_rss_flag; + + cmd->data = 0; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(cmd->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + if (nn->rss_cfg & nfp_rss_flag) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + return 0; +} + +static int nfp_net_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = nn->num_rx_rings; + return 0; + case ETHTOOL_GRXFH: + return nfp_net_get_rss_hash_opts(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_net_set_rss_hash_opt(struct nfp_net *nn, + struct ethtool_rxnfc *nfc) +{ + u32 new_rss_cfg = nn->rss_cfg; + u32 nfp_rss_flag; + int err; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + /* RSS only supports IP SA/DA and L4 src/dst ports */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + /* We need at least the IP SA/DA fields for hashing */ + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(nfc->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + new_rss_cfg &= ~nfp_rss_flag; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + new_rss_cfg |= nfp_rss_flag; + break; + default: + return -EINVAL; + } + + new_rss_cfg |= NFP_NET_CFG_RSS_TOEPLITZ; + new_rss_cfg |= NFP_NET_CFG_RSS_MASK; + + if (new_rss_cfg == nn->rss_cfg) + return 0; + + writel(new_rss_cfg, nn->ctrl_bar + NFP_NET_CFG_RSS_CTRL); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); + if (err) + return err; + + nn->rss_cfg = new_rss_cfg; + + nn_dbg(nn, "Changed RSS config to 0x%x\n", nn->rss_cfg); + return 0; +} + +static int nfp_net_set_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return nfp_net_set_rss_hash_opt(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return 0; + + return ARRAY_SIZE(nn->rss_itbl); +} + +static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev) +{ + return NFP_NET_CFG_RSS_KEY_SZ; +} + +static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + if (indir) + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + indir[i] = nn->rss_itbl[i]; + if (key) + memcpy(key, nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int nfp_net_set_rxfh(struct net_device *netdev, + const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS) || + !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!key && !indir) + return 0; + + if (key) { + memcpy(nn->rss_key, key, NFP_NET_CFG_RSS_KEY_SZ); + nfp_net_rss_write_key(nn); + } + if (indir) { + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + nn->rss_itbl[i] = indir[i]; + + nfp_net_rss_write_itbl(nn); + } + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); +} + +/* Dump BAR registers + */ +static int nfp_net_get_regs_len(struct net_device *netdev) +{ + return NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 *regs_buf = p; + int i; + + regs->version = nn_readl(nn, NFP_NET_CFG_VERSION); + + for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++) + regs_buf[i] = readl(nn->ctrl_bar + (i * sizeof(u32))); +} + +static int nfp_net_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; + ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; + ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; + ec->tx_max_coalesced_frames = nn->tx_coalesce_max_frames; + + return 0; +} + +static int nfp_net_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int factor; + + if (ec->rx_coalesce_usecs_irq || + ec->rx_max_coalesced_frames_irq || + ec->tx_coalesce_usecs_irq || + ec->tx_max_coalesced_frames_irq || + ec->stats_block_coalesce_usecs || + ec->use_adaptive_rx_coalesce || + ec->use_adaptive_tx_coalesce || + ec->pkt_rate_low || + ec->rx_coalesce_usecs_low || + ec->rx_max_coalesced_frames_low || + ec->tx_coalesce_usecs_low || + ec->tx_max_coalesced_frames_low || + ec->pkt_rate_high || + ec->rx_coalesce_usecs_high || + ec->rx_max_coalesced_frames_high || + ec->tx_coalesce_usecs_high || + ec->tx_max_coalesced_frames_high || + ec->rate_sample_interval) + return -ENOTSUPP; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this would + * cause interrupts to never be generated. To disable coalescing, set + * usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + */ + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + /* ensure valid configuration */ + if (!ec->rx_coalesce_usecs && !ec->rx_max_coalesced_frames) + return -EINVAL; + + if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) + return -EINVAL; + + if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + /* configuration is valid */ + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; + nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; + nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; + nn->tx_coalesce_max_frames = ec->tx_max_coalesced_frames; + + /* write configuration to device */ + nfp_net_coalesce_write_cfg(nn); + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); +} + +static const struct ethtool_ops nfp_net_ethtool_ops = { + .get_drvinfo = nfp_net_get_drvinfo, + .get_ringparam = nfp_net_get_ringparam, + .set_ringparam = nfp_net_set_ringparam, + .get_strings = nfp_net_get_strings, + .get_ethtool_stats = nfp_net_get_stats, + .get_sset_count = nfp_net_get_sset_count, + .get_rxnfc = nfp_net_get_rxnfc, + .set_rxnfc = nfp_net_set_rxnfc, + .get_rxfh_indir_size = nfp_net_get_rxfh_indir_size, + .get_rxfh_key_size = nfp_net_get_rxfh_key_size, + .get_rxfh = nfp_net_get_rxfh, + .set_rxfh = nfp_net_set_rxfh, + .get_regs_len = nfp_net_get_regs_len, + .get_regs = nfp_net_get_regs, + .get_coalesce = nfp_net_get_coalesce, + .set_coalesce = nfp_net_set_coalesce, +}; + +void nfp_net_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &nfp_net_ethtool_ops; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c new file mode 100644 index 000000000000..e2b22b8a20f1 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -0,0 +1,385 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_netvf_main.c + * Netronome virtual function network device driver: Main entry point + * Author: Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/etherdevice.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +const char nfp_net_driver_name[] = "nfp_netvf"; +const char nfp_net_driver_version[] = "0.1"; +#define PCI_DEVICE_NFP6000VF 0x6003 +static const struct pci_device_id nfp_netvf_pci_device_ids[] = { + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_NFP6000VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, + }, + { 0, } /* Required last entry. */ +}; +MODULE_DEVICE_TABLE(pci, nfp_netvf_pci_device_ids); + +static void nfp_netvf_get_mac_addr(struct nfp_net *nn) +{ + u8 mac_addr[ETH_ALEN]; + + put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); + /* We can't do readw for NFP-3200 compatibility */ + put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16, + &mac_addr[4]); + + if (!is_valid_ether_addr(mac_addr)) { + eth_hw_addr_random(nn->netdev); + return; + } + + ether_addr_copy(nn->netdev->dev_addr, mac_addr); + ether_addr_copy(nn->netdev->perm_addr, mac_addr); +} + +static int nfp_netvf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct nfp_net_fw_version fw_ver; + int max_tx_rings, max_rx_rings; + u32 tx_bar_off, rx_bar_off; + u32 tx_bar_sz, rx_bar_sz; + int tx_bar_no, rx_bar_no; + u8 __iomem *ctrl_bar; + struct nfp_net *nn; + int is_nfp3200; + u32 startq; + int stride; + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = pci_request_regions(pdev, nfp_net_driver_name); + if (err) { + dev_err(&pdev->dev, "Unable to allocate device memory.\n"); + goto err_pci_disable; + } + + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + is_nfp3200 = 0; + break; + default: + err = -ENODEV; + goto err_pci_regions; + } + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS)); + if (err) + goto err_pci_regions; + + /* Map the Control BAR. + * + * Irrespective of the advertised BAR size we only map the + * first NFP_NET_CFG_BAR_SZ of the BAR. This keeps the code + * the identical for PF and VF drivers. + */ + ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR), + NFP_NET_CFG_BAR_SZ); + if (!ctrl_bar) { + dev_err(&pdev->dev, + "Failed to map resource %d\n", NFP_NET_CRTL_BAR); + err = -EIO; + goto err_pci_regions; + } + + nfp_net_get_fw_version(&fw_ver, ctrl_bar); + if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + + /* Determine stride */ + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); + } else { + switch (fw_ver.major) { + case 1 ... 3: + if (is_nfp3200) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + } else { + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; + } + break; + default: + dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + } + + /* Find out how many rings are supported */ + max_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); + max_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); + + tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * stride; + rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * stride; + + /* Sanity checks */ + if (tx_bar_sz > pci_resource_len(pdev, tx_bar_no)) { + dev_err(&pdev->dev, + "TX BAR too small for number of TX rings. Adjusting\n"); + tx_bar_sz = pci_resource_len(pdev, tx_bar_no); + max_tx_rings = (tx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + if (rx_bar_sz > pci_resource_len(pdev, rx_bar_no)) { + dev_err(&pdev->dev, + "RX BAR too small for number of RX rings. Adjusting\n"); + rx_bar_sz = pci_resource_len(pdev, rx_bar_no); + max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + + /* XXX Implement a workaround for THB-350 here. Ideally, we + * have a different PCI ID for A rev VFs. + */ + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = NFP_PCIE_QUEUE(startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = NFP_PCIE_QUEUE(startq); + break; + default: + err = -ENODEV; + goto err_ctrl_unmap; + } + + /* Allocate and initialise the netdev */ + nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings); + if (IS_ERR(nn)) { + err = PTR_ERR(nn); + goto err_ctrl_unmap; + } + + nn->fw_ver = fw_ver; + nn->ctrl_bar = ctrl_bar; + nn->is_vf = 1; + nn->is_nfp3200 = is_nfp3200; + nn->stride_tx = stride; + nn->stride_rx = stride; + + if (rx_bar_no == tx_bar_no) { + u32 bar_off, bar_sz; + resource_size_t map_addr; + + /* Make a single overlapping BAR mapping */ + if (tx_bar_off < rx_bar_off) + bar_off = tx_bar_off; + else + bar_off = rx_bar_off; + + if ((tx_bar_off + tx_bar_sz) > (rx_bar_off + rx_bar_sz)) + bar_sz = (tx_bar_off + tx_bar_sz) - bar_off; + else + bar_sz = (rx_bar_off + rx_bar_sz) - bar_off; + + map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off; + nn->q_bar = ioremap_nocache(map_addr, bar_sz); + if (!nn->q_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* TX queues */ + nn->tx_bar = nn->q_bar + (tx_bar_off - bar_off); + /* RX queues */ + nn->rx_bar = nn->q_bar + (rx_bar_off - bar_off); + } else { + resource_size_t map_addr; + + /* TX queues */ + map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off; + nn->tx_bar = ioremap_nocache(map_addr, tx_bar_sz); + if (!nn->tx_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* RX queues */ + map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off; + nn->rx_bar = ioremap_nocache(map_addr, rx_bar_sz); + if (!nn->rx_bar) { + nn_err(nn, "Failed to map resource %d\n", rx_bar_no); + err = -EIO; + goto err_unmap_tx; + } + } + + nfp_netvf_get_mac_addr(nn); + + err = nfp_net_irqs_alloc(nn); + if (!err) { + nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n"); + err = -EIO; + goto err_unmap_rx; + } + + /* Get ME clock frequency from ctrl BAR + * XXX for now frequency is hardcoded until we figure out how + * to get the value from nfp-hwinfo into ctrl bar + */ + nn->me_freq_mhz = 1200; + + err = nfp_net_netdev_init(nn->netdev); + if (err) + goto err_irqs_disable; + + pci_set_drvdata(pdev, nn); + + nfp_net_info(nn); + nfp_net_debugfs_adapter_add(nn); + + return 0; + +err_irqs_disable: + nfp_net_irqs_disable(nn); +err_unmap_rx: + if (!nn->q_bar) + iounmap(nn->rx_bar); +err_unmap_tx: + if (!nn->q_bar) + iounmap(nn->tx_bar); + else + iounmap(nn->q_bar); +err_netdev_free: + pci_set_drvdata(pdev, NULL); + nfp_net_netdev_free(nn); +err_ctrl_unmap: + iounmap(ctrl_bar); +err_pci_regions: + pci_release_regions(pdev); +err_pci_disable: + pci_disable_device(pdev); + return err; +} + +static void nfp_netvf_pci_remove(struct pci_dev *pdev) +{ + struct nfp_net *nn = pci_get_drvdata(pdev); + + /* Note, the order is slightly different from above as we need + * to keep the nn pointer around till we have freed everything. + */ + nfp_net_debugfs_adapter_del(nn); + + nfp_net_netdev_clean(nn->netdev); + + nfp_net_irqs_disable(nn); + + if (!nn->q_bar) { + iounmap(nn->rx_bar); + iounmap(nn->tx_bar); + } else { + iounmap(nn->q_bar); + } + iounmap(nn->ctrl_bar); + + pci_set_drvdata(pdev, NULL); + + nfp_net_netdev_free(nn); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver nfp_netvf_pci_driver = { + .name = nfp_net_driver_name, + .id_table = nfp_netvf_pci_device_ids, + .probe = nfp_netvf_pci_probe, + .remove = nfp_netvf_pci_remove, +}; + +static int __init nfp_netvf_init(void) +{ + int err; + + pr_info("%s: NFP VF Network driver, Copyright (C) 2014-2015 Netronome Systems\n", + nfp_net_driver_name); + + nfp_net_debugfs_create(); + err = pci_register_driver(&nfp_netvf_pci_driver); + if (err) { + nfp_net_debugfs_destroy(); + return err; + } + + return 0; +} + +static void __exit nfp_netvf_exit(void) +{ + pci_unregister_driver(&nfp_netvf_pci_driver); + nfp_net_debugfs_destroy(); +} + +module_init(nfp_netvf_init); +module_exit(nfp_netvf_exit); + +MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFP VF network device driver"); diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index b159ef8303cc..057665180f13 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1326,7 +1326,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Get platform resources */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); - if ((!res) || (irq < 0) || (irq >= NR_IRQS)) { + if (!res || irq < 0) { dev_err(&pdev->dev, "error getting resources.\n"); ret = -ENXIO; goto err_exit; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b2f8e854dfd1..264e954675d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3993,6 +3993,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_PHY_CORE_WRITE 0x000e0000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_SET_LED_MODE 0x00200000 + #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; @@ -4044,6 +4046,10 @@ struct public_drv_mb { #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK 0x0000FF00 +#define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 +#define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 +#define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_ENGINE 0x10100000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 947c7af72b25..6b02e1134360 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1135,6 +1135,23 @@ static int qed_drain(struct qed_dev *cdev) return 0; } +static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_set_led(hwfn, ptt, mode); + + qed_ptt_release(hwfn, ptt); + + return status; +} + const struct qed_common_ops qed_common_ops_pass = { .probe = &qed_probe, .remove = &qed_remove, @@ -1155,6 +1172,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_msglvl = &qed_init_dp, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, + .set_led = &qed_set_led, }; u32 qed_get_protocol_version(enum qed_protocol protocol) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 20d048cdcb88..ba1b1f1ef789 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -858,3 +858,30 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return 0; } + +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_led_mode mode) +{ + u32 resp = 0, param = 0, drv_mb_param; + int rc; + + switch (mode) { + case QED_LED_MODE_ON: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_ON; + break; + case QED_LED_MODE_OFF: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OFF; + break; + case QED_LED_MODE_RESTORE: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OPER; + break; + default: + DP_NOTICE(p_hwfn, "Invalid LED mode %d\n", mode); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LED_MODE, + drv_mb_param, &resp, ¶m); + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dbaae586b4a7..506197d5c3dd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -224,6 +224,19 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Set LED status + * + * @param p_hwfn + * @param p_ptt + * @param mode - LED mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_led_mode mode); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index ea00d5f3bab4..7c6caf7f6612 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -116,6 +116,7 @@ struct qede_dev { (edev)->dev_info.num_tc) struct qede_fastpath *fp_array; + u16 req_rss; u16 num_rss; u8 num_tc; #define QEDE_RSS_CNT(edev) ((edev)->num_rss) @@ -269,13 +270,13 @@ int qede_change_mtu(struct net_device *dev, int new_mtu); void qede_fill_by_demand_stats(struct qede_dev *edev); #define RX_RING_SIZE_POW 13 -#define RX_RING_SIZE BIT(RX_RING_SIZE_POW) +#define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) #define NUM_RX_BDS_MAX (RX_RING_SIZE - 1) #define NUM_RX_BDS_MIN 128 #define NUM_RX_BDS_DEF NUM_RX_BDS_MAX #define TX_RING_SIZE_POW 13 -#define TX_RING_SIZE BIT(TX_RING_SIZE_POW) +#define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW)) #define NUM_TX_BDS_MAX (TX_RING_SIZE - 1) #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 3a362476a22c..e442b85c9a5e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -322,6 +322,30 @@ static void qede_set_msglevel(struct net_device *ndev, u32 level) dp_module, dp_level); } +static int qede_nway_reset(struct net_device *dev) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + struct qed_link_params link_params; + + if (!netif_running(dev)) + return 0; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + if (!current_link.link_up) + return 0; + + /* Toggle the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params); + link_params.link_up = true; + edev->ops->common->set_link(edev->cdev, &link_params); + + return 0; +} + static u32 qede_get_link(struct net_device *dev) { struct qede_dev *edev = netdev_priv(dev); @@ -333,6 +357,106 @@ static u32 qede_get_link(struct net_device *dev) return current_link.link_up; } +static void qede_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + ering->rx_max_pending = NUM_RX_BDS_MAX; + ering->rx_pending = edev->q_num_rx_buffers; + ering->tx_max_pending = NUM_TX_BDS_MAX; + ering->tx_pending = edev->q_num_tx_buffers; +} + +static int qede_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Set ring params command parameters: rx_pending = %d, tx_pending = %d\n", + ering->rx_pending, ering->tx_pending); + + /* Validate legality of configuration */ + if (ering->rx_pending > NUM_RX_BDS_MAX || + ering->rx_pending < NUM_RX_BDS_MIN || + ering->tx_pending > NUM_TX_BDS_MAX || + ering->tx_pending < NUM_TX_BDS_MIN) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Can only support Rx Buffer size [0%08x,...,0x%08x] and Tx Buffer size [0x%08x,...,0x%08x]\n", + NUM_RX_BDS_MIN, NUM_RX_BDS_MAX, + NUM_TX_BDS_MIN, NUM_TX_BDS_MAX); + return -EINVAL; + } + + /* Change ring size and re-load */ + edev->q_num_rx_buffers = ering->rx_pending; + edev->q_num_tx_buffers = ering->tx_pending; + + if (netif_running(edev->ndev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static void qede_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + if (current_link.pause_config & QED_LINK_PAUSE_AUTONEG_ENABLE) + epause->autoneg = true; + if (current_link.pause_config & QED_LINK_PAUSE_RX_ENABLE) + epause->rx_pause = true; + if (current_link.pause_config & QED_LINK_PAUSE_TX_ENABLE) + epause->tx_pause = true; + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "ethtool_pauseparam: cmd %d autoneg %d rx_pause %d tx_pause %d\n", + epause->cmd, epause->autoneg, epause->rx_pause, + epause->tx_pause); +} + +static int qede_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_params params; + struct qed_link_output current_link; + + if (!edev->dev_info.common.is_mf) { + DP_INFO(edev, + "Pause parameters can not be updated in non-default mode\n"); + return -EOPNOTSUPP; + } + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + memset(¶ms, 0, sizeof(params)); + params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG; + if (epause->autoneg) { + if (!(current_link.supported_caps & SUPPORTED_Autoneg)) { + DP_INFO(edev, "autoneg not supported\n"); + return -EINVAL; + } + params.pause_config |= QED_LINK_PAUSE_AUTONEG_ENABLE; + } + if (epause->rx_pause) + params.pause_config |= QED_LINK_PAUSE_RX_ENABLE; + if (epause->tx_pause) + params.pause_config |= QED_LINK_PAUSE_TX_ENABLE; + + params.link_up = true; + edev->ops->common->set_link(edev->cdev, ¶ms); + + return 0; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -366,17 +490,104 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) return 0; } +static void qede_get_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + channels->max_combined = QEDE_MAX_RSS_CNT(edev); + channels->combined_count = QEDE_RSS_CNT(edev); +} + +static int qede_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", + channels->rx_count, channels->tx_count, + channels->other_count, channels->combined_count); + + /* We don't support separate rx / tx, nor `other' channels. */ + if (channels->rx_count || channels->tx_count || + channels->other_count || (channels->combined_count == 0) || + (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "command parameters not supported\n"); + return -EINVAL; + } + + /* Check if there was a change in the active parameters */ + if (channels->combined_count == QEDE_RSS_CNT(edev)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "No change in active parameters\n"); + return 0; + } + + /* We need the number of queues to be divisible between the hwfns */ + if (channels->combined_count % edev->dev_info.common.num_hwfns) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Number of channels must be divisable by %04x\n", + edev->dev_info.common.num_hwfns); + return -EINVAL; + } + + /* Set number of queues and reload if necessary */ + edev->req_rss = channels->combined_count; + if (netif_running(dev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static int qede_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct qede_dev *edev = netdev_priv(dev); + u8 led_state = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + return 1; /* cycle on/off once per second */ + + case ETHTOOL_ID_ON: + led_state = QED_LED_MODE_ON; + break; + + case ETHTOOL_ID_OFF: + led_state = QED_LED_MODE_OFF; + break; + + case ETHTOOL_ID_INACTIVE: + led_state = QED_LED_MODE_RESTORE; + break; + } + + edev->ops->common->set_led(edev->cdev, led_state); + + return 0; +} + static const struct ethtool_ops qede_ethtool_ops = { .get_settings = qede_get_settings, .set_settings = qede_set_settings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, + .nway_reset = qede_nway_reset, .get_link = qede_get_link, + .get_ringparam = qede_get_ringparam, + .set_ringparam = qede_set_ringparam, + .get_pauseparam = qede_get_pauseparam, + .set_pauseparam = qede_set_pauseparam, .get_strings = qede_get_strings, + .set_phys_id = qede_set_phys_id, .get_ethtool_stats = qede_get_ethtool_stats, .get_sset_count = qede_get_sset_count, + .get_channels = qede_get_channels, + .set_channels = qede_set_channels, }; void qede_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f4657a2e730a..6237f10b5119 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1502,8 +1502,11 @@ static int qede_set_num_queues(struct qede_dev *edev) u16 rss_num; /* Setup queues according to possible resources*/ - rss_num = netif_get_num_default_rss_queues() * - edev->dev_info.common.num_hwfns; + if (edev->req_rss) + rss_num = edev->req_rss; + else + rss_num = netif_get_num_default_rss_queues() * + edev->dev_info.common.num_hwfns; rss_num = min_t(u16, QEDE_MAX_RSS_CNT(edev), rss_num); diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index f9dee7436e81..9fbe92ac225b 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -206,6 +206,7 @@ enum CCC_BIT { CCC_OPC_RESET = 0x00000000, CCC_OPC_CONFIG = 0x00000001, CCC_OPC_OPERATION = 0x00000002, + CCC_GAC = 0x00000080, CCC_DTSR = 0x00000100, CCC_CSEL = 0x00030000, CCC_CSEL_HPB = 0x00010000, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 990dc55cdada..1cf12264861c 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -115,12 +115,15 @@ static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac) if (mac) { ether_addr_copy(ndev->dev_addr, mac); } else { - ndev->dev_addr[0] = (ravb_read(ndev, MAHR) >> 24); - ndev->dev_addr[1] = (ravb_read(ndev, MAHR) >> 16) & 0xFF; - ndev->dev_addr[2] = (ravb_read(ndev, MAHR) >> 8) & 0xFF; - ndev->dev_addr[3] = (ravb_read(ndev, MAHR) >> 0) & 0xFF; - ndev->dev_addr[4] = (ravb_read(ndev, MALR) >> 8) & 0xFF; - ndev->dev_addr[5] = (ravb_read(ndev, MALR) >> 0) & 0xFF; + u32 mahr = ravb_read(ndev, MAHR); + u32 malr = ravb_read(ndev, MALR); + + ndev->dev_addr[0] = (mahr >> 24) & 0xFF; + ndev->dev_addr[1] = (mahr >> 16) & 0xFF; + ndev->dev_addr[2] = (mahr >> 8) & 0xFF; + ndev->dev_addr[3] = (mahr >> 0) & 0xFF; + ndev->dev_addr[4] = (malr >> 8) & 0xFF; + ndev->dev_addr[5] = (malr >> 0) & 0xFF; } } @@ -1227,11 +1230,12 @@ static int ravb_open(struct net_device *ndev) /* Device init */ error = ravb_dmac_init(ndev); if (error) - goto out_free_irq; + goto out_free_irq2; ravb_emac_init(ndev); /* Initialise PTP Clock driver */ - ravb_ptp_init(ndev, priv->pdev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); @@ -1244,10 +1248,13 @@ static int ravb_open(struct net_device *ndev) out_ptp_stop: /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); +out_free_irq2: + if (priv->chip_id == RCAR_GEN3) + free_irq(priv->emac_irq, ndev); out_free_irq: free_irq(ndev->irq, ndev); - free_irq(priv->emac_irq, ndev); out_napi_off: napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); @@ -1476,7 +1483,8 @@ static int ravb_close(struct net_device *ndev) ravb_write(ndev, 0, TIC); /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); /* Set the config mode to stop the AVB-DMAC's processes */ if (ravb_stop_dma(ndev) < 0) @@ -1656,7 +1664,9 @@ static int ravb_mdio_release(struct ravb_private *priv) static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, + { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 }, + { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 }, { } }; MODULE_DEVICE_TABLE(of, ravb_match_table); @@ -1781,8 +1791,16 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG, - CCC); + if (chip_id == RCAR_GEN2) { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG, CCC); + /* Set CSEL value */ + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | + CCC_CSEL_HPB, CCC); + } else { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC); + } /* Set CSEL value */ ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB, @@ -1814,6 +1832,10 @@ static int ravb_probe(struct platform_device *pdev) /* Initialise HW timestamp list */ INIT_LIST_HEAD(&priv->ts_skb_list); + /* Initialise PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_init(ndev, pdev); + /* Debug message level */ priv->msg_enable = RAVB_DEF_MSG_ENABLE; @@ -1855,6 +1877,10 @@ out_napi_del: out_dma_free: dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); + + /* Stop PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); out_release: if (ndev) free_netdev(ndev); @@ -1869,6 +1895,10 @@ static int ravb_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct ravb_private *priv = netdev_priv(ndev); + /* Stop PTP Clock driver */ + if (priv->chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); /* Set reset mode */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e7bab7909ed9..67cd24312c11 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -449,6 +449,109 @@ static void sh_eth_set_duplex(struct net_device *ndev) sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); } +static void sh_eth_chip_reset(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + /* reset device */ + sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + mdelay(1); +} + +static void sh_eth_set_rate_gether(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + switch (mdp->speed) { + case 10: /* 10BASE */ + sh_eth_write(ndev, GECMR_10, GECMR); + break; + case 100:/* 100BASE */ + sh_eth_write(ndev, GECMR_100, GECMR); + break; + case 1000: /* 1000BASE */ + sh_eth_write(ndev, GECMR_1000, GECMR); + break; + default: + break; + } +} + +#ifdef CONFIG_OF +/* R7S72100 */ +static struct sh_eth_cpu_data r7s72100_data = { + .chip_reset = sh_eth_chip_reset, + .set_duplex = sh_eth_set_duplex, + + .register_type = SH_ETH_REG_FAST_RZ, + + .ecsr_value = ECSR_ICD, + .ecsipr_value = ECSIPR_ICDIP, + .eesipr_value = 0xff7f009f, + + .tx_check = EESR_TC1 | EESR_FTC, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | + EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | + EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + + .no_psr = 1, + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, + .no_trimd = 1, + .no_ade = 1, + .hw_crc = 1, + .tsu = 1, + .shift_rd0 = 1, +}; + +static void sh_eth_chip_reset_r8a7740(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + /* reset device */ + sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + mdelay(1); + + sh_eth_select_mii(ndev); +} + +/* R8A7740 */ +static struct sh_eth_cpu_data r8a7740_data = { + .chip_reset = sh_eth_chip_reset_r8a7740, + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate_gether, + + .register_type = SH_ETH_REG_GIGABIT, + + .ecsr_value = ECSR_ICD | ECSR_MPD, + .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, + .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, + + .tx_check = EESR_TC1 | EESR_FTC, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | + EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | + EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .bculr = 1, + .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, + .no_trimd = 1, + .no_ade = 1, + .tsu = 1, + .select_mii = 1, + .shift_rd0 = 1, +}; + /* There is CPU dependent code */ static void sh_eth_set_rate_r8a777x(struct net_device *ndev) { @@ -514,6 +617,7 @@ static struct sh_eth_cpu_data r8a779x_data = { .hw_swap = 1, .rmiimode = 1, }; +#endif /* CONFIG_OF */ static void sh_eth_set_rate_sh7724(struct net_device *ndev) { @@ -671,34 +775,6 @@ static struct sh_eth_cpu_data sh7757_data_giga = { .tsu = 1, }; -static void sh_eth_chip_reset(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); - mdelay(1); -} - -static void sh_eth_set_rate_gether(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - switch (mdp->speed) { - case 10: /* 10BASE */ - sh_eth_write(ndev, GECMR_10, GECMR); - break; - case 100:/* 100BASE */ - sh_eth_write(ndev, GECMR_100, GECMR); - break; - case 1000: /* 1000BASE */ - sh_eth_write(ndev, GECMR_1000, GECMR); - break; - default: - break; - } -} - /* SH7734 */ static struct sh_eth_cpu_data sh7734_data = { .chip_reset = sh_eth_chip_reset, @@ -756,80 +832,6 @@ static struct sh_eth_cpu_data sh7763_data = { .irq_flags = IRQF_SHARED, }; -static void sh_eth_chip_reset_r8a7740(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); - mdelay(1); - - sh_eth_select_mii(ndev); -} - -/* R8A7740 */ -static struct sh_eth_cpu_data r8a7740_data = { - .chip_reset = sh_eth_chip_reset_r8a7740, - .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_gether, - - .register_type = SH_ETH_REG_GIGABIT, - - .ecsr_value = ECSR_ICD | ECSR_MPD, - .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, - .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, - - .tx_check = EESR_TC1 | EESR_FTC, - .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | - EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | - EESR_TDE | EESR_ECI, - .fdr_value = 0x0000070f, - - .apr = 1, - .mpr = 1, - .tpauser = 1, - .bculr = 1, - .hw_swap = 1, - .rpadir = 1, - .rpadir_value = 2 << 16, - .no_trimd = 1, - .no_ade = 1, - .tsu = 1, - .select_mii = 1, - .shift_rd0 = 1, -}; - -/* R7S72100 */ -static struct sh_eth_cpu_data r7s72100_data = { - .chip_reset = sh_eth_chip_reset, - .set_duplex = sh_eth_set_duplex, - - .register_type = SH_ETH_REG_FAST_RZ, - - .ecsr_value = ECSR_ICD, - .ecsipr_value = ECSIPR_ICDIP, - .eesipr_value = 0xff7f009f, - - .tx_check = EESR_TC1 | EESR_FTC, - .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | - EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | - EESR_TDE | EESR_ECI, - .fdr_value = 0x0000070f, - - .no_psr = 1, - .apr = 1, - .mpr = 1, - .tpauser = 1, - .hw_swap = 1, - .rpadir = 1, - .rpadir_value = 2 << 16, - .no_trimd = 1, - .no_ade = 1, - .hw_crc = 1, - .tsu = 1, - .shift_rd0 = 1, -}; - static struct sh_eth_cpu_data sh7619_data = { .register_type = SH_ETH_REG_FAST_SH3_SH2, @@ -987,12 +989,15 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac) if (mac[0] || mac[1] || mac[2] || mac[3] || mac[4] || mac[5]) { memcpy(ndev->dev_addr, mac, ETH_ALEN); } else { - ndev->dev_addr[0] = (sh_eth_read(ndev, MAHR) >> 24); - ndev->dev_addr[1] = (sh_eth_read(ndev, MAHR) >> 16) & 0xFF; - ndev->dev_addr[2] = (sh_eth_read(ndev, MAHR) >> 8) & 0xFF; - ndev->dev_addr[3] = (sh_eth_read(ndev, MAHR) & 0xFF); - ndev->dev_addr[4] = (sh_eth_read(ndev, MALR) >> 8) & 0xFF; - ndev->dev_addr[5] = (sh_eth_read(ndev, MALR) & 0xFF); + u32 mahr = sh_eth_read(ndev, MAHR); + u32 malr = sh_eth_read(ndev, MALR); + + ndev->dev_addr[0] = (mahr >> 24) & 0xFF; + ndev->dev_addr[1] = (mahr >> 16) & 0xFF; + ndev->dev_addr[2] = (mahr >> 8) & 0xFF; + ndev->dev_addr[3] = (mahr >> 0) & 0xFF; + ndev->dev_addr[4] = (malr >> 8) & 0xFF; + ndev->dev_addr[5] = (malr >> 0) & 0xFF; } } @@ -1008,56 +1013,34 @@ struct bb_info { void (*set_gate)(void *addr); struct mdiobb_ctrl ctrl; void *addr; - u32 mmd_msk;/* MMD */ - u32 mdo_msk; - u32 mdi_msk; - u32 mdc_msk; }; -/* PHY bit set */ -static void bb_set(void *addr, u32 msk) +static void sh_mdio_ctrl(struct mdiobb_ctrl *ctrl, u32 mask, int set) { - iowrite32(ioread32(addr) | msk, addr); -} + struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); + u32 pir; -/* PHY bit clear */ -static void bb_clr(void *addr, u32 msk) -{ - iowrite32((ioread32(addr) & ~msk), addr); -} + if (bitbang->set_gate) + bitbang->set_gate(bitbang->addr); -/* PHY bit read */ -static int bb_read(void *addr, u32 msk) -{ - return (ioread32(addr) & msk) != 0; + pir = ioread32(bitbang->addr); + if (set) + pir |= mask; + else + pir &= ~mask; + iowrite32(pir, bitbang->addr); } /* Data I/O pin control */ static void sh_mmd_ctrl(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mmd_msk); - else - bb_clr(bitbang->addr, bitbang->mmd_msk); + sh_mdio_ctrl(ctrl, PIR_MMD, bit); } /* Set bit data*/ static void sh_set_mdio(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mdo_msk); - else - bb_clr(bitbang->addr, bitbang->mdo_msk); + sh_mdio_ctrl(ctrl, PIR_MDO, bit); } /* Get bit data*/ @@ -1068,21 +1051,13 @@ static int sh_get_mdio(struct mdiobb_ctrl *ctrl) if (bitbang->set_gate) bitbang->set_gate(bitbang->addr); - return bb_read(bitbang->addr, bitbang->mdi_msk); + return (ioread32(bitbang->addr) & PIR_MDI) != 0; } /* MDC pin control */ static void sh_mdc_ctrl(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mdc_msk); - else - bb_clr(bitbang->addr, bitbang->mdc_msk); + sh_mdio_ctrl(ctrl, PIR_MDC, bit); } /* mdio bus control struct */ @@ -2894,10 +2869,6 @@ static int sh_mdio_init(struct sh_eth_private *mdp, /* bitbang init */ bitbang->addr = mdp->addr + mdp->reg_offset[PIR]; bitbang->set_gate = pd->set_mdio_gate; - bitbang->mdi_msk = PIR_MDI; - bitbang->mdo_msk = PIR_MDO; - bitbang->mmd_msk = PIR_MMD; - bitbang->mdc_msk = PIR_MDC; bitbang->ctrl.ops = &bb_ops; /* MII controller setting */ @@ -3277,13 +3248,6 @@ static struct platform_device_id sh_eth_id_table[] = { { "sh7757-ether", (kernel_ulong_t)&sh7757_data }, { "sh7757-gether", (kernel_ulong_t)&sh7757_data_giga }, { "sh7763-gether", (kernel_ulong_t)&sh7763_data }, - { "r7s72100-ether", (kernel_ulong_t)&r7s72100_data }, - { "r8a7740-gether", (kernel_ulong_t)&r8a7740_data }, - { "r8a777x-ether", (kernel_ulong_t)&r8a777x_data }, - { "r8a7790-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7791-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7793-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7794-ether", (kernel_ulong_t)&r8a779x_data }, { } }; MODULE_DEVICE_TABLE(platform, sh_eth_id_table); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index bc6d21b471be..c4a0e8a967dd 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -181,13 +181,6 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID); if (!(nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { - netif_err(efx, drv, efx->net_dev, - "current firmware does not support TSO\n"); - return -ENODEV; - } - - if (!(nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) { netif_err(efx, probe, efx->net_dev, "current firmware does not support an RX prefix\n"); @@ -1797,6 +1790,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); tx_queue->write_count = 1; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) { + tx_queue->tso_version = 1; + } + wmb(); efx_ef10_push_tx_desc(tx_queue, txd); @@ -2375,8 +2374,19 @@ static int efx_ef10_ev_init(struct efx_channel *channel) 1 << MC_CMD_WORKAROUND_EXT_OUT_FLR_DONE_LBN) { netif_info(efx, drv, efx->net_dev, "other functions on NIC have been reset\n"); - /* MC's boot count has incremented */ - ++nic_data->warm_boot_count; + + /* With MCFW v4.6.x and earlier, the + * boot count will have incremented, + * so re-read the warm_boot_count + * value now to ensure this function + * doesn't think it has changed next + * time it checks. + */ + rc = efx_ef10_get_warm_boot_count(efx); + if (rc >= 0) { + nic_data->warm_boot_count = rc; + rc = 0; + } } nic_data->workaround_26807 = true; } else if (rc == -EPERM) { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4e82bcfbe3e0..b405349a570c 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2784,6 +2784,12 @@ static const struct pci_device_id efx_pci_table[] = { .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ + .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {0} /* end of list */ }; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index a8ddd122f685..38c422321cda 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -182,6 +182,7 @@ struct efx_tx_buffer { * * @efx: The associated Efx NIC * @queue: DMA queue number + * @tso_version: Version of TSO in use for this queue. * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring @@ -228,6 +229,7 @@ struct efx_tx_queue { /* Members which don't change on the fast path */ struct efx_nic *efx ____cacheline_aligned_in_smp; unsigned queue; + unsigned int tso_version; struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; @@ -1502,8 +1504,9 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, * same cycle, the XMAC can miss the IPG altogether. We work around * this by adding a further 16 bytes. */ +#define EFX_FRAME_PAD 16 #define EFX_MAX_FRAME_LEN(mtu) \ - ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16) + (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8)) static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 67f6afaa022f..f7a0ec1bca97 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -1010,13 +1010,17 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, /* Parse the SKB header and initialise state. */ static int tso_start(struct tso_state *st, struct efx_nic *efx, + struct efx_tx_queue *tx_queue, const struct sk_buff *skb) { - bool use_opt_desc = efx_nic_rev(efx) >= EFX_REV_HUNT_A0; struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; + bool use_opt_desc = false; dma_addr_t dma_addr; + if (tx_queue->tso_version == 1) + use_opt_desc = true; + st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); @@ -1271,7 +1275,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - rc = tso_start(&state, efx, skb); + rc = tso_start(&state, efx, tx_queue, skb); if (rc) goto mem_err; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 623c6ed8764a..f4518bc2cd28 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -137,6 +137,31 @@ struct stmmac_extra_stats { unsigned long pcs_link; unsigned long pcs_duplex; unsigned long pcs_speed; + /* debug register */ + unsigned long mtl_tx_status_fifo_full; + unsigned long mtl_tx_fifo_not_empty; + unsigned long mmtl_fifo_ctrl; + unsigned long mtl_tx_fifo_read_ctrl_write; + unsigned long mtl_tx_fifo_read_ctrl_wait; + unsigned long mtl_tx_fifo_read_ctrl_read; + unsigned long mtl_tx_fifo_read_ctrl_idle; + unsigned long mac_tx_in_pause; + unsigned long mac_tx_frame_ctrl_xfer; + unsigned long mac_tx_frame_ctrl_idle; + unsigned long mac_tx_frame_ctrl_wait; + unsigned long mac_tx_frame_ctrl_pause; + unsigned long mac_gmii_tx_proto_engine; + unsigned long mtl_rx_fifo_fill_level_full; + unsigned long mtl_rx_fifo_fill_above_thresh; + unsigned long mtl_rx_fifo_fill_below_thresh; + unsigned long mtl_rx_fifo_fill_level_empty; + unsigned long mtl_rx_fifo_read_ctrl_flush; + unsigned long mtl_rx_fifo_read_ctrl_read_data; + unsigned long mtl_rx_fifo_read_ctrl_status; + unsigned long mtl_rx_fifo_read_ctrl_idle; + unsigned long mtl_rx_fifo_ctrl_active; + unsigned long mac_rx_frame_ctrl_fifo; + unsigned long mac_gmii_rx_proto_engine; }; /* CSR Frequency Access Defines*/ @@ -408,6 +433,7 @@ struct stmmac_ops { void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*ctrl_ane)(struct mac_device_info *hw, bool restart); void (*get_adv)(struct mac_device_info *hw, struct rgmii_adv *adv); + void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x); }; /* PTP and HW Timer helpers */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 82de68b1a452..36d3355f2fb0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -198,19 +198,19 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) return 0; } -static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) +static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) { struct device *dev = &gmac->pdev->dev; gmac->phy_mode = of_get_phy_mode(dev->of_node); if (gmac->phy_mode < 0) { dev_err(dev, "missing phy mode property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (of_property_read_u32(dev->of_node, "qcom,id", &gmac->id) < 0) { dev_err(dev, "missing qcom id property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* The GMACs are called 1 to 4 in the documentation, but to simplify the @@ -219,13 +219,13 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) */ if (gmac->id < 0 || gmac->id > 3) { dev_err(dev, "invalid gmac id\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } gmac->core_clk = devm_clk_get(dev, "stmmaceth"); if (IS_ERR(gmac->core_clk)) { dev_err(dev, "missing stmmaceth clk property\n"); - return gmac->core_clk; + return PTR_ERR(gmac->core_clk); } clk_set_rate(gmac->core_clk, 266000000); @@ -234,18 +234,16 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) "qcom,nss-common"); if (IS_ERR(gmac->nss_common)) { dev_err(dev, "missing nss-common node\n"); - return gmac->nss_common; + return PTR_ERR(gmac->nss_common); } /* Setup the register map for the qsgmii csr registers */ gmac->qsgmii_csr = syscon_regmap_lookup_by_phandle(dev->of_node, "qcom,qsgmii-csr"); - if (IS_ERR(gmac->qsgmii_csr)) { + if (IS_ERR(gmac->qsgmii_csr)) dev_err(dev, "missing qsgmii-csr node\n"); - return gmac->qsgmii_csr; - } - return NULL; + return PTR_ERR_OR_ZERO(gmac->qsgmii_csr); } static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed) @@ -262,7 +260,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct ipq806x_gmac *gmac; int val; - void *err; + int err; val = stmmac_get_platform_resources(pdev, &stmmac_res); if (val) @@ -279,9 +277,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) gmac->pdev = pdev; err = ipq806x_gmac_of_parse(gmac); - if (IS_ERR(err)) { + if (err) { dev_err(dev, "device tree parsing error\n"); - return PTR_ERR(err); + return err; } regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 7f6f4a4fcc70..58c05acc2aab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -299,16 +299,17 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) { const char *rs; + dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN; + err = of_property_read_string(np, "st,tx-retime-src", &rs); if (err < 0) { dev_warn(dev, "Use internal clock source\n"); - dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN; - } else if (!strcasecmp(rs, "clk_125")) { - dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125; - } else if (!strcasecmp(rs, "txclk")) { - dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK; + } else { + if (!strcasecmp(rs, "clk_125")) + dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125; + else if (!strcasecmp(rs, "txclk")) + dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK; } - dwmac->speed = SPEED_1000; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index b3fe0575ff6b..8831a053ac13 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -34,6 +34,7 @@ #define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ #define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC_DEBUG 0x00000024 /* GMAC debug register */ #define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ #define GMAC_INT_STATUS 0x00000038 /* interrupt status register */ @@ -177,6 +178,47 @@ enum inter_frame_gap { #define GMAC_FLOW_CTRL_TFE 0x00000002 /* Tx Flow Control Enable */ #define GMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ +/* DEBUG Register defines */ +/* MTL TxStatus FIFO */ +#define GMAC_DEBUG_TXSTSFSTS BIT(25) /* MTL TxStatus FIFO Full Status */ +#define GMAC_DEBUG_TXFSTS BIT(24) /* MTL Tx FIFO Not Empty Status */ +#define GMAC_DEBUG_TWCSTS BIT(22) /* MTL Tx FIFO Write Controller */ +/* MTL Tx FIFO Read Controller Status */ +#define GMAC_DEBUG_TRCSTS_MASK GENMASK(21, 20) +#define GMAC_DEBUG_TRCSTS_SHIFT 20 +#define GMAC_DEBUG_TRCSTS_IDLE 0 +#define GMAC_DEBUG_TRCSTS_READ 1 +#define GMAC_DEBUG_TRCSTS_TXW 2 +#define GMAC_DEBUG_TRCSTS_WRITE 3 +#define GMAC_DEBUG_TXPAUSED BIT(19) /* MAC Transmitter in PAUSE */ +/* MAC Transmit Frame Controller Status */ +#define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) +#define GMAC_DEBUG_TFCSTS_SHIFT 17 +#define GMAC_DEBUG_TFCSTS_IDLE 0 +#define GMAC_DEBUG_TFCSTS_WAIT 1 +#define GMAC_DEBUG_TFCSTS_GEN_PAUSE 2 +#define GMAC_DEBUG_TFCSTS_XFER 3 +/* MAC GMII or MII Transmit Protocol Engine Status */ +#define GMAC_DEBUG_TPESTS BIT(16) +#define GMAC_DEBUG_RXFSTS_MASK GENMASK(9, 8) /* MTL Rx FIFO Fill-level */ +#define GMAC_DEBUG_RXFSTS_SHIFT 8 +#define GMAC_DEBUG_RXFSTS_EMPTY 0 +#define GMAC_DEBUG_RXFSTS_BT 1 +#define GMAC_DEBUG_RXFSTS_AT 2 +#define GMAC_DEBUG_RXFSTS_FULL 3 +#define GMAC_DEBUG_RRCSTS_MASK GENMASK(6, 5) /* MTL Rx FIFO Read Controller */ +#define GMAC_DEBUG_RRCSTS_SHIFT 5 +#define GMAC_DEBUG_RRCSTS_IDLE 0 +#define GMAC_DEBUG_RRCSTS_RDATA 1 +#define GMAC_DEBUG_RRCSTS_RSTAT 2 +#define GMAC_DEBUG_RRCSTS_FLUSH 3 +#define GMAC_DEBUG_RWCSTS BIT(4) /* MTL Rx FIFO Write Controller Active */ +/* MAC Receive Frame Controller FIFO Status */ +#define GMAC_DEBUG_RFCFCSTS_MASK GENMASK(2, 1) +#define GMAC_DEBUG_RFCFCSTS_SHIFT 1 +/* MAC GMII or MII Receive Protocol Engine Status */ +#define GMAC_DEBUG_RPESTS BIT(0) + /*--- DMA BLOCK defines ---*/ /* DMA Bus Mode register defines */ #define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 371a669d69fd..c2941172f6d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -397,6 +397,80 @@ static void dwmac1000_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv) adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT; } +static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x) +{ + u32 value = readl(ioaddr + GMAC_DEBUG); + + if (value & GMAC_DEBUG_TXSTSFSTS) + x->mtl_tx_status_fifo_full++; + if (value & GMAC_DEBUG_TXFSTS) + x->mtl_tx_fifo_not_empty++; + if (value & GMAC_DEBUG_TWCSTS) + x->mmtl_fifo_ctrl++; + if (value & GMAC_DEBUG_TRCSTS_MASK) { + u32 trcsts = (value & GMAC_DEBUG_TRCSTS_MASK) + >> GMAC_DEBUG_TRCSTS_SHIFT; + if (trcsts == GMAC_DEBUG_TRCSTS_WRITE) + x->mtl_tx_fifo_read_ctrl_write++; + else if (trcsts == GMAC_DEBUG_TRCSTS_TXW) + x->mtl_tx_fifo_read_ctrl_wait++; + else if (trcsts == GMAC_DEBUG_TRCSTS_READ) + x->mtl_tx_fifo_read_ctrl_read++; + else + x->mtl_tx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_TXPAUSED) + x->mac_tx_in_pause++; + if (value & GMAC_DEBUG_TFCSTS_MASK) { + u32 tfcsts = (value & GMAC_DEBUG_TFCSTS_MASK) + >> GMAC_DEBUG_TFCSTS_SHIFT; + + if (tfcsts == GMAC_DEBUG_TFCSTS_XFER) + x->mac_tx_frame_ctrl_xfer++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_GEN_PAUSE) + x->mac_tx_frame_ctrl_pause++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_WAIT) + x->mac_tx_frame_ctrl_wait++; + else + x->mac_tx_frame_ctrl_idle++; + } + if (value & GMAC_DEBUG_TPESTS) + x->mac_gmii_tx_proto_engine++; + if (value & GMAC_DEBUG_RXFSTS_MASK) { + u32 rxfsts = (value & GMAC_DEBUG_RXFSTS_MASK) + >> GMAC_DEBUG_RRCSTS_SHIFT; + + if (rxfsts == GMAC_DEBUG_RXFSTS_FULL) + x->mtl_rx_fifo_fill_level_full++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_AT) + x->mtl_rx_fifo_fill_above_thresh++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_BT) + x->mtl_rx_fifo_fill_below_thresh++; + else + x->mtl_rx_fifo_fill_level_empty++; + } + if (value & GMAC_DEBUG_RRCSTS_MASK) { + u32 rrcsts = (value & GMAC_DEBUG_RRCSTS_MASK) >> + GMAC_DEBUG_RRCSTS_SHIFT; + + if (rrcsts == GMAC_DEBUG_RRCSTS_FLUSH) + x->mtl_rx_fifo_read_ctrl_flush++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RSTAT) + x->mtl_rx_fifo_read_ctrl_read_data++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RDATA) + x->mtl_rx_fifo_read_ctrl_status++; + else + x->mtl_rx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_RWCSTS) + x->mtl_rx_fifo_ctrl_active++; + if (value & GMAC_DEBUG_RFCFCSTS_MASK) + x->mac_rx_frame_ctrl_fifo = (value & GMAC_DEBUG_RFCFCSTS_MASK) + >> GMAC_DEBUG_RFCFCSTS_SHIFT; + if (value & GMAC_DEBUG_RPESTS) + x->mac_gmii_rx_proto_engine++; +} + static const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -413,6 +487,7 @@ static const struct stmmac_ops dwmac1000_ops = { .set_eee_pls = dwmac1000_set_eee_pls, .ctrl_ane = dwmac1000_ctrl_ane, .get_adv = dwmac1000_get_adv, + .debug = dwmac1000_debug, }; struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 2e51b816a7e8..4c6486cc80fb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -136,6 +136,31 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(irq_pcs_ane_n), STMMAC_STAT(irq_pcs_link_n), STMMAC_STAT(irq_rgmii_n), + /* DEBUG */ + STMMAC_STAT(mtl_tx_status_fifo_full), + STMMAC_STAT(mtl_tx_fifo_not_empty), + STMMAC_STAT(mmtl_fifo_ctrl), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_write), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_wait), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_read), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_idle), + STMMAC_STAT(mac_tx_in_pause), + STMMAC_STAT(mac_tx_frame_ctrl_xfer), + STMMAC_STAT(mac_tx_frame_ctrl_idle), + STMMAC_STAT(mac_tx_frame_ctrl_wait), + STMMAC_STAT(mac_tx_frame_ctrl_pause), + STMMAC_STAT(mac_gmii_tx_proto_engine), + STMMAC_STAT(mtl_rx_fifo_fill_level_full), + STMMAC_STAT(mtl_rx_fifo_fill_above_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_below_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_level_empty), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_flush), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_read_data), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_status), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_idle), + STMMAC_STAT(mtl_rx_fifo_ctrl_active), + STMMAC_STAT(mac_rx_frame_ctrl_fifo), + STMMAC_STAT(mac_gmii_rx_proto_engine), }; #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) @@ -497,6 +522,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, if (val) priv->xstats.phy_eee_wakeup_error_n = val; } + + if ((priv->hw->mac->debug) && + (priv->synopsys_id >= DWMAC_CORE_3_50)) + priv->hw->mac->debug(priv->ioaddr, + (void *)&priv->xstats); } for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 64d8aa4e0cad..3c6549aee11d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -185,7 +185,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } } @@ -2232,6 +2232,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) frame_len = priv->hw->desc->get_rx_frame_len(p, coe); + /* check if frame_len fits the preallocated memory */ + if (frame_len > priv->dma_buf_sz) { + priv->dev->stats.rx_length_errors++; + break; + } + /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 * Type frames (LLC/LLC-SNAP) */ @@ -3102,6 +3108,7 @@ int stmmac_resume(struct net_device *ndev) init_dma_desc_rings(ndev, GFP_ATOMIC); stmmac_hw_setup(ndev, false); stmmac_init_tx_coalesce(priv); + stmmac_set_rx_mode(ndev); napi_enable(&priv->napi); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index ebf6abc4853f..bba670c42e37 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -138,7 +138,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) #ifdef CONFIG_OF if (priv->device->of_node) { - int reset_gpio, active_low; if (data->reset_gpio < 0) { struct device_node *np = priv->device->of_node; @@ -154,24 +153,23 @@ int stmmac_mdio_reset(struct mii_bus *bus) "snps,reset-active-low"); of_property_read_u32_array(np, "snps,reset-delays-us", data->delays, 3); - } - reset_gpio = data->reset_gpio; - active_low = data->active_low; + if (gpio_request(data->reset_gpio, "mdio-reset")) + return 0; + } - if (!gpio_request(reset_gpio, "mdio-reset")) { - gpio_direction_output(reset_gpio, active_low ? 1 : 0); - if (data->delays[0]) - msleep(DIV_ROUND_UP(data->delays[0], 1000)); + gpio_direction_output(data->reset_gpio, + data->active_low ? 1 : 0); + if (data->delays[0]) + msleep(DIV_ROUND_UP(data->delays[0], 1000)); - gpio_set_value(reset_gpio, active_low ? 0 : 1); - if (data->delays[1]) - msleep(DIV_ROUND_UP(data->delays[1], 1000)); + gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1); + if (data->delays[1]) + msleep(DIV_ROUND_UP(data->delays[1], 1000)); - gpio_set_value(reset_gpio, active_low ? 1 : 0); - if (data->delays[2]) - msleep(DIV_ROUND_UP(data->delays[2], 1000)); - } + gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0); + if (data->delays[2]) + msleep(DIV_ROUND_UP(data->delays[2], 1000)); } #endif diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c index c08be62bceba..1562ab4151e1 100644 --- a/drivers/net/ethernet/ti/cpsw-common.c +++ b/drivers/net/ethernet/ti/cpsw-common.c @@ -78,6 +78,9 @@ static int cpsw_am33xx_cm_get_macid(struct device *dev, u16 offset, int slave, int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr) { + if (of_machine_is_compatible("ti,dm8148")) + return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); + if (of_machine_is_compatible("ti,am33xx")) return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index bb1bb72121c0..17a26a429b71 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -113,7 +113,7 @@ struct netcp_intf { #define NETCP_PSDATA_LEN KNAV_DMA_NUM_PS_WORDS struct netcp_packet { struct sk_buff *skb; - u32 *epib; + __le32 *epib; u32 *psdata; unsigned int psdata_len; struct netcp_intf *netcp; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index e5e20e734f21..92d08eb262c2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -109,69 +109,80 @@ module_param(netcp_debug_level, int, 0); MODULE_PARM_DESC(netcp_debug_level, "Netcp debug level (NETIF_MSG bits) (0=none,...,16=all)"); /* Helper functions - Get/Set */ -static void get_pkt_info(u32 *buff, u32 *buff_len, u32 *ndesc, +static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc, struct knav_dma_desc *desc) { - *buff_len = desc->buff_len; - *buff = desc->buff; - *ndesc = desc->next_desc; + *buff_len = le32_to_cpu(desc->buff_len); + *buff = le32_to_cpu(desc->buff); + *ndesc = le32_to_cpu(desc->next_desc); } -static void get_pad_info(u32 *pad0, u32 *pad1, struct knav_dma_desc *desc) +static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc) { - *pad0 = desc->pad[0]; - *pad1 = desc->pad[1]; + *pad0 = le32_to_cpu(desc->pad[0]); + *pad1 = le32_to_cpu(desc->pad[1]); + *pad2 = le32_to_cpu(desc->pad[2]); } -static void get_org_pkt_info(u32 *buff, u32 *buff_len, +static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc) +{ + u64 pad64; + + pad64 = le32_to_cpu(desc->pad[0]) + + ((u64)le32_to_cpu(desc->pad[1]) << 32); + *padptr = (void *)(uintptr_t)pad64; +} + +static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len, struct knav_dma_desc *desc) { - *buff = desc->orig_buff; - *buff_len = desc->orig_len; + *buff = le32_to_cpu(desc->orig_buff); + *buff_len = le32_to_cpu(desc->orig_len); } -static void get_words(u32 *words, int num_words, u32 *desc) +static void get_words(dma_addr_t *words, int num_words, __le32 *desc) { int i; for (i = 0; i < num_words; i++) - words[i] = desc[i]; + words[i] = le32_to_cpu(desc[i]); } -static void set_pkt_info(u32 buff, u32 buff_len, u32 ndesc, +static void set_pkt_info(dma_addr_t buff, u32 buff_len, u32 ndesc, struct knav_dma_desc *desc) { - desc->buff_len = buff_len; - desc->buff = buff; - desc->next_desc = ndesc; + desc->buff_len = cpu_to_le32(buff_len); + desc->buff = cpu_to_le32(buff); + desc->next_desc = cpu_to_le32(ndesc); } static void set_desc_info(u32 desc_info, u32 pkt_info, struct knav_dma_desc *desc) { - desc->desc_info = desc_info; - desc->packet_info = pkt_info; + desc->desc_info = cpu_to_le32(desc_info); + desc->packet_info = cpu_to_le32(pkt_info); } -static void set_pad_info(u32 pad0, u32 pad1, struct knav_dma_desc *desc) +static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc) { - desc->pad[0] = pad0; - desc->pad[1] = pad1; + desc->pad[0] = cpu_to_le32(pad0); + desc->pad[1] = cpu_to_le32(pad1); + desc->pad[2] = cpu_to_le32(pad1); } -static void set_org_pkt_info(u32 buff, u32 buff_len, +static void set_org_pkt_info(dma_addr_t buff, u32 buff_len, struct knav_dma_desc *desc) { - desc->orig_buff = buff; - desc->orig_len = buff_len; + desc->orig_buff = cpu_to_le32(buff); + desc->orig_len = cpu_to_le32(buff_len); } -static void set_words(u32 *words, int num_words, u32 *desc) +static void set_words(u32 *words, int num_words, __le32 *desc) { int i; for (i = 0; i < num_words; i++) - desc[i] = words[i]; + desc[i] = cpu_to_le32(words[i]); } /* Read the e-fuse value as 32 bit values to be endian independent */ @@ -570,7 +581,7 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp, dma_addr_t dma_desc, dma_buf; unsigned int buf_len, dma_sz = sizeof(*ndesc); void *buf_ptr; - u32 tmp; + u32 pad[2]; get_words(&dma_desc, 1, &desc->next_desc); @@ -580,14 +591,15 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp, dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n"); break; } - get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc); - get_pad_info((u32 *)&buf_ptr, &tmp, ndesc); + get_pad_ptr(&buf_ptr, ndesc); dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE); __free_page(buf_ptr); knav_pool_desc_put(netcp->rx_pool, desc); } - get_pad_info((u32 *)&buf_ptr, &buf_len, desc); + get_pad_info(&pad[0], &pad[1], &buf_len, desc); + buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32)); + if (buf_ptr) netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr); knav_pool_desc_put(netcp->rx_pool, desc); @@ -626,7 +638,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) struct netcp_packet p_info; struct sk_buff *skb; void *org_buf_ptr; - u32 tmp; dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz); if (!dma_desc) @@ -639,7 +650,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc); - get_pad_info((u32 *)&org_buf_ptr, &org_buf_len, desc); + get_pad_ptr(&org_buf_ptr, desc); if (unlikely(!org_buf_ptr)) { dev_err(netcp->ndev_dev, "NULL bufptr in desc\n"); @@ -664,6 +675,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) /* Fill in the page fragment list */ while (dma_desc) { struct page *page; + void *ptr; ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz); if (unlikely(!ndesc)) { @@ -672,14 +684,15 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) } get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc); - get_pad_info((u32 *)&page, &tmp, ndesc); + get_pad_ptr(ptr, ndesc); + page = ptr; if (likely(dma_buff && buf_len && page)) { dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE, DMA_FROM_DEVICE); } else { - dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%p), len(%d), page(%p)\n", - (void *)dma_buff, buf_len, page); + dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%pad), len(%d), page(%p)\n", + &dma_buff, buf_len, page); goto free_desc; } @@ -750,7 +763,6 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq) unsigned int buf_len, dma_sz; dma_addr_t dma; void *buf_ptr; - u32 tmp; /* Allocate descriptor */ while ((dma = knav_queue_pop(netcp->rx_fdq[fdq], &dma_sz))) { @@ -761,7 +773,7 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq) } get_org_pkt_info(&dma, &buf_len, desc); - get_pad_info((u32 *)&buf_ptr, &tmp, desc); + get_pad_ptr(buf_ptr, desc); if (unlikely(!dma)) { dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n"); @@ -813,7 +825,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) struct page *page; dma_addr_t dma; void *bufptr; - u32 pad[2]; + u32 pad[3]; /* Allocate descriptor */ hwdesc = knav_pool_desc_get(netcp->rx_pool); @@ -830,7 +842,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); bufptr = netdev_alloc_frag(primary_buf_len); - pad[1] = primary_buf_len; + pad[2] = primary_buf_len; if (unlikely(!bufptr)) { dev_warn_ratelimited(netcp->ndev_dev, @@ -842,7 +854,8 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) if (unlikely(dma_mapping_error(netcp->dev, dma))) goto fail; - pad[0] = (u32)bufptr; + pad[0] = lower_32_bits((uintptr_t)bufptr); + pad[1] = upper_32_bits((uintptr_t)bufptr); } else { /* Allocate a secondary receive queue entry */ @@ -853,8 +866,9 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) } buf_len = PAGE_SIZE; dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE); - pad[0] = (u32)page; - pad[1] = 0; + pad[0] = lower_32_bits(dma); + pad[1] = upper_32_bits(dma); + pad[2] = 0; } desc_info = KNAV_DMA_DESC_PS_INFO_IN_DESC; @@ -864,7 +878,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) << KNAV_DMA_DESC_RETQ_SHIFT; set_org_pkt_info(dma, buf_len, hwdesc); - set_pad_info(pad[0], pad[1], hwdesc); + set_pad_info(pad[0], pad[1], pad[2], hwdesc); set_desc_info(desc_info, pkt_info, hwdesc); /* Push to FDQs */ @@ -935,8 +949,8 @@ static void netcp_free_tx_desc_chain(struct netcp_intf *netcp, dma_unmap_single(netcp->dev, dma_buf, buf_len, DMA_TO_DEVICE); else - dev_warn(netcp->ndev_dev, "bad Tx desc buf(%p), len(%d)\n", - (void *)dma_buf, buf_len); + dev_warn(netcp->ndev_dev, "bad Tx desc buf(%pad), len(%d)\n", + &dma_buf, buf_len); knav_pool_desc_put(netcp->tx_pool, ndesc); ndesc = NULL; @@ -953,11 +967,11 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp, unsigned int budget) { struct knav_dma_desc *desc; + void *ptr; struct sk_buff *skb; unsigned int dma_sz; dma_addr_t dma; int pkts = 0; - u32 tmp; while (budget--) { dma = knav_queue_pop(netcp->tx_compl_q, &dma_sz); @@ -970,7 +984,8 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp, continue; } - get_pad_info((u32 *)&skb, &tmp, desc); + get_pad_ptr(&ptr, desc); + skb = ptr; netcp_free_tx_desc_chain(netcp, desc, dma_sz); if (!skb) { dev_err(netcp->ndev_dev, "No skb in Tx desc\n"); @@ -1059,6 +1074,7 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp) u32 page_offset = frag->page_offset; u32 buf_len = skb_frag_size(frag); dma_addr_t desc_dma; + u32 desc_dma_32; u32 pkt_info; dma_addr = dma_map_page(dev, page, page_offset, buf_len, @@ -1075,13 +1091,13 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp) goto free_descs; } - desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool, - (void *)ndesc); + desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool, ndesc); pkt_info = (netcp->tx_compl_qid & KNAV_DMA_DESC_RETQ_MASK) << KNAV_DMA_DESC_RETQ_SHIFT; set_pkt_info(dma_addr, buf_len, 0, ndesc); - set_words(&desc_dma, 1, &pdesc->next_desc); + desc_dma_32 = (u32)desc_dma; + set_words(&desc_dma_32, 1, &pdesc->next_desc); pkt_len += buf_len; if (pdesc != desc) knav_pool_desc_map(netcp->tx_pool, pdesc, @@ -1129,8 +1145,8 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp, p_info.ts_context = NULL; p_info.txtstamp_complete = NULL; p_info.epib = desc->epib; - p_info.psdata = desc->psdata; - memset(p_info.epib, 0, KNAV_DMA_NUM_EPIB_WORDS * sizeof(u32)); + p_info.psdata = (u32 __force *)desc->psdata; + memset(p_info.epib, 0, KNAV_DMA_NUM_EPIB_WORDS * sizeof(__le32)); /* Find out where to inject the packet for transmission */ list_for_each_entry(tx_hook, &netcp->txhook_list_head, list) { @@ -1154,11 +1170,12 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp, /* update descriptor */ if (p_info.psdata_len) { - u32 *psdata = p_info.psdata; + /* psdata points to both native-endian and device-endian data */ + __le32 *psdata = (void __force *)p_info.psdata; memmove(p_info.psdata, p_info.psdata + p_info.psdata_len, p_info.psdata_len); - set_words(psdata, p_info.psdata_len, psdata); + set_words(p_info.psdata, p_info.psdata_len, psdata); tmp |= (p_info.psdata_len & KNAV_DMA_DESC_PSLEN_MASK) << KNAV_DMA_DESC_PSLEN_SHIFT; } @@ -1173,11 +1190,14 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp, } set_words(&tmp, 1, &desc->packet_info); - set_words((u32 *)&skb, 1, &desc->pad[0]); + tmp = lower_32_bits((uintptr_t)&skb); + set_words(&tmp, 1, &desc->pad[0]); + tmp = upper_32_bits((uintptr_t)&skb); + set_words(&tmp, 1, &desc->pad[1]); if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) { tmp = tx_pipe->switch_to_port; - set_words((u32 *)&tmp, 1, &desc->tag_info); + set_words(&tmp, 1, &desc->tag_info); } /* submit packet descriptor */ |