217 files changed, 20612 insertions, 4433 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 4547a1b8b958..7677c745fb30 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -562,7 +562,7 @@ static void el3_common_remove (struct net_device *dev)
 }
 
 #ifdef CONFIG_EISA
-static int __init el3_eisa_probe (struct device *device)
+static int el3_eisa_probe(struct device *device)
 {
 	short i;
 	int ioaddr, irq, if_port;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 2839af00f20c..1c5f3b273e6a 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -907,7 +907,7 @@ static struct eisa_device_id vortex_eisa_ids[] = {
 };
 MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids);
 
-static int __init vortex_eisa_probe(struct device *device)
+static int vortex_eisa_probe(struct device *device)
 {
 	void __iomem *ioaddr;
 	struct eisa_device *edev;
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 955d06b9cdba..0b13af8e4070 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -29,6 +29,7 @@ source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
+source "drivers/net/ethernet/aurora/Kconfig"
 source "drivers/net/ethernet/cadence/Kconfig"
 source "drivers/net/ethernet/adi/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
@@ -121,6 +122,7 @@ config FEALNX
 	  cards. <http://www.myson.com.tw/>
 
 source "drivers/net/ethernet/natsemi/Kconfig"
+source "drivers/net/ethernet/netronome/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
 
 config NET_NETX
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 4a2ee98738f0..38dc1a776a2b 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
+obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
 obj-$(CONFIG_NET_CADENCE) += cadence/
 obj-$(CONFIG_NET_BFIN) += adi/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
@@ -52,6 +53,7 @@ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
+obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
 obj-$(CONFIG_NET_NETX) += netx-eth.o
 obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/
 obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index 096531a73124..e0e95a15cab0 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -1912,21 +1912,21 @@ static struct platform_driver bfin_mac_driver = {
 	},
 };
 
+static struct platform_driver * const drivers[] = {
+	&bfin_mii_bus_driver,
+	&bfin_mac_driver,
+};
+
 static int __init bfin_mac_init(void)
 {
-	int ret;
-	ret = platform_driver_register(&bfin_mii_bus_driver);
-	if (!ret)
-		return platform_driver_register(&bfin_mac_driver);
-	return -ENODEV;
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 module_init(bfin_mac_init);
 
 static void __exit bfin_mac_cleanup(void)
 {
-	platform_driver_unregister(&bfin_mac_driver);
-	platform_driver_unregister(&bfin_mii_bus_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 module_exit(bfin_mac_cleanup);
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e2afabf3a465..7ccebae9cb48 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1500,10 +1500,11 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
-	if (!pci_set_dma_mask(pdev, PCNET32_DMA_MASK)) {
+	err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
+	if (err) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("architecture does not support 32bit PCI busmaster DMA\n");
-		return -ENODEV;
+		return err;
 	}
 	if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index c31e691d11fc..db55c9f6e8e1 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -869,7 +869,7 @@ void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata)
 	pdata->mdio_bus = NULL;
 }
 
-struct xgene_mac_ops xgene_gmac_ops = {
+const struct xgene_mac_ops xgene_gmac_ops = {
 	.init = xgene_gmac_init,
 	.reset = xgene_gmac_reset,
 	.rx_enable = xgene_gmac_rx_enable,
@@ -879,7 +879,7 @@ struct xgene_mac_ops xgene_gmac_ops = {
 	.set_mac_addr = xgene_gmac_set_mac_addr,
 };
 
-struct xgene_port_ops xgene_gport_ops = {
+const struct xgene_port_ops xgene_gport_ops = {
 	.reset = xgene_enet_reset,
 	.cle_bypass = xgene_enet_cle_bypass,
 	.shutdown = xgene_gport_shutdown,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index c153a1dc5ff7..8a9091039ab4 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -340,8 +340,8 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata);
 void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata);
 bool xgene_ring_mgr_init(struct xgene_enet_pdata *p);
 
-extern struct xgene_mac_ops xgene_gmac_ops;
-extern struct xgene_port_ops xgene_gport_ops;
+extern const struct xgene_mac_ops xgene_gmac_ops;
+extern const struct xgene_port_ops xgene_gport_ops;
 extern struct xgene_ring_ops xgene_ring1_ops;
 
 #endif /* __XGENE_ENET_HW_H__ */
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 991412ce6f48..2394191ad28e 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -450,12 +450,12 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
-	pdata->ring_ops->wr_cmd(tx_ring, count);
 	skb_tx_timestamp(skb);
 
 	pdata->stats.tx_packets++;
 	pdata->stats.tx_bytes += skb->len;
 
+	pdata->ring_ops->wr_cmd(tx_ring, count);
 	return NETDEV_TX_OK;
 }
 
@@ -682,16 +682,16 @@ static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata)
 static int xgene_enet_open(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct xgene_mac_ops *mac_ops = pdata->mac_ops;
+	const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
 	int ret;
 
 	mac_ops->tx_enable(pdata);
 	mac_ops->rx_enable(pdata);
 
+	xgene_enet_napi_enable(pdata);
 	ret = xgene_enet_register_irq(ndev);
 	if (ret)
 		return ret;
-	xgene_enet_napi_enable(pdata);
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
 		phy_start(pdata->phy_dev);
@@ -706,7 +706,7 @@ static int xgene_enet_open(struct net_device *ndev)
 static int xgene_enet_close(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct xgene_mac_ops *mac_ops = pdata->mac_ops;
+	const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
 
 	netif_stop_queue(ndev);
 
@@ -715,13 +715,13 @@ static int xgene_enet_close(struct net_device *ndev)
 	else
 		cancel_delayed_work_sync(&pdata->link_work);
 
-	xgene_enet_napi_disable(pdata);
-	xgene_enet_free_irq(ndev);
-	xgene_enet_process_ring(pdata->rx_ring, -1);
-
 	mac_ops->tx_disable(pdata);
 	mac_ops->rx_disable(pdata);
 
+	xgene_enet_free_irq(ndev);
+	xgene_enet_napi_disable(pdata);
+	xgene_enet_process_ring(pdata->rx_ring, -1);
+
 	return 0;
 }
 
@@ -1084,7 +1084,7 @@ static const struct net_device_ops xgene_ndev_ops = {
 };
 
 #ifdef CONFIG_ACPI
-static int xgene_get_port_id_acpi(struct device *dev,
+static void xgene_get_port_id_acpi(struct device *dev,
 				  struct xgene_enet_pdata *pdata)
 {
 	acpi_status status;
@@ -1097,24 +1097,19 @@ static int xgene_get_port_id_acpi(struct device *dev,
 		pdata->port_id = temp;
 	}
 
-	return 0;
+	return;
 }
 #endif
 
-static int xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata)
+static void xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata)
 {
 	u32 id = 0;
-	int ret;
 
-	ret = of_property_read_u32(dev->of_node, "port-id", &id);
-	if (ret) {
-		pdata->port_id = 0;
-		ret = 0;
-	} else {
-		pdata->port_id = id & BIT(0);
-	}
+	of_property_read_u32(dev->of_node, "port-id", &id);
 
-	return ret;
+	pdata->port_id = id & BIT(0);
+
+	return;
 }
 
 static int xgene_get_tx_delay(struct xgene_enet_pdata *pdata)
@@ -1209,13 +1204,11 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 	}
 
 	if (dev->of_node)
-		ret = xgene_get_port_id_dt(dev, pdata);
+		xgene_get_port_id_dt(dev, pdata);
 #ifdef CONFIG_ACPI
 	else
-		ret = xgene_get_port_id_acpi(dev, pdata);
+		xgene_get_port_id_acpi(dev, pdata);
 #endif
-	if (ret)
-		return ret;
 
 	if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
 		eth_hw_addr_random(ndev);
@@ -1423,7 +1416,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct xgene_enet_pdata *pdata;
 	struct device *dev = &pdev->dev;
-	struct xgene_mac_ops *mac_ops;
+	const struct xgene_mac_ops *mac_ops;
 	const struct of_device_id *of_id;
 	int ret;
 
@@ -1474,15 +1467,15 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	}
 	ndev->hw_features = ndev->features;
 
-	ret = register_netdev(ndev);
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (ret) {
-		netdev_err(ndev, "Failed to register netdev\n");
+		netdev_err(ndev, "No usable DMA configuration\n");
 		goto err;
 	}
 
-	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	ret = register_netdev(ndev);
 	if (ret) {
-		netdev_err(ndev, "No usable DMA configuration\n");
+		netdev_err(ndev, "Failed to register netdev\n");
 		goto err;
 	}
 
@@ -1490,14 +1483,17 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	if (ret)
 		goto err;
 
-	xgene_enet_napi_add(pdata);
 	mac_ops = pdata->mac_ops;
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
 		ret = xgene_enet_mdio_config(pdata);
-	else
+		if (ret)
+			goto err;
+	} else {
 		INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state);
+	}
 
-	return ret;
+	xgene_enet_napi_add(pdata);
+	return 0;
 err:
 	unregister_netdev(ndev);
 	free_netdev(ndev);
@@ -1507,7 +1503,7 @@ err:
 static int xgene_enet_remove(struct platform_device *pdev)
 {
 	struct xgene_enet_pdata *pdata;
-	struct xgene_mac_ops *mac_ops;
+	const struct xgene_mac_ops *mac_ops;
 	struct net_device *ndev;
 
 	pdata = platform_get_drvdata(pdev);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index a6e56b88c0a0..054caf055f0a 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -174,8 +174,8 @@ struct xgene_enet_pdata {
 	int phy_mode;
 	enum xgene_enet_rm rm;
 	struct rtnl_link_stats64 stats;
-	struct xgene_mac_ops *mac_ops;
-	struct xgene_port_ops *port_ops;
+	const struct xgene_mac_ops *mac_ops;
+	const struct xgene_port_ops *port_ops;
 	struct xgene_ring_ops *ring_ops;
 	struct delayed_work link_work;
 	u32 port_id;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index 05b817e56fde..78475512b683 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -405,7 +405,7 @@ static void xgene_enet_link_state(struct work_struct *work)
 	schedule_delayed_work(&p->link_work, poll_interval);
 }
 
-struct xgene_mac_ops xgene_sgmac_ops = {
+const struct xgene_mac_ops xgene_sgmac_ops = {
 	.init		= xgene_sgmac_init,
 	.reset		= xgene_sgmac_reset,
 	.rx_enable	= xgene_sgmac_rx_enable,
@@ -416,7 +416,7 @@ struct xgene_mac_ops xgene_sgmac_ops = {
 	.link_state	= xgene_enet_link_state
 };
 
-struct xgene_port_ops xgene_sgport_ops = {
+const struct xgene_port_ops xgene_sgport_ops = {
 	.reset		= xgene_enet_reset,
 	.cle_bypass	= xgene_enet_cle_bypass,
 	.shutdown	= xgene_enet_shutdown
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
index de432465009c..29a71b4dcc44 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
@@ -35,7 +35,7 @@
 #define MPA_IDLE_WITH_QMI_EMPTY		BIT(12)
 #define SG_RX_DV_GATE_REG_0_ADDR	0x0dfc
 
-extern struct xgene_mac_ops xgene_sgmac_ops;
-extern struct xgene_port_ops xgene_sgport_ops;
+extern const struct xgene_mac_ops xgene_sgmac_ops;
+extern const struct xgene_port_ops xgene_sgport_ops;
 
 #endif  /* __XGENE_ENET_SGMAC_H__ */
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 7a28a48cb2c7..ba030dc1940b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -326,7 +326,7 @@ static void xgene_enet_link_state(struct work_struct *work)
 	schedule_delayed_work(&pdata->link_work, poll_interval);
 }
 
-struct xgene_mac_ops xgene_xgmac_ops = {
+const struct xgene_mac_ops xgene_xgmac_ops = {
 	.init = xgene_xgmac_init,
 	.reset = xgene_xgmac_reset,
 	.rx_enable = xgene_xgmac_rx_enable,
@@ -338,7 +338,7 @@ struct xgene_mac_ops xgene_xgmac_ops = {
 	.link_state = xgene_enet_link_state
 };
 
-struct xgene_port_ops xgene_xgport_ops = {
+const struct xgene_port_ops xgene_xgport_ops = {
 	.reset = xgene_enet_reset,
 	.cle_bypass = xgene_enet_xgcle_bypass,
 	.shutdown = xgene_enet_shutdown,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index f8f908dbf51c..0a2dca8a1725 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
@@ -69,7 +69,7 @@
 #define XG_ENET_SPARE_CFG_REG_1_ADDR	0x0410
 #define XGENET_RX_DV_GATE_REG_0_ADDR	0x0804
 
-extern struct xgene_mac_ops xgene_xgmac_ops;
-extern struct xgene_port_ops xgene_xgport_ops;
+extern const struct xgene_mac_ops xgene_xgmac_ops;
+extern const struct xgene_port_ops xgene_xgport_ops;
 
 #endif /* __XGENE_ENET_XGMAC_H__ */
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c8af3ce3ea38..d3763bc2c561 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -577,7 +577,6 @@ static int alx_alloc_rings(struct alx_priv *alx)
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
-	alx->tx_ringsz = alx->tx_ringsz;
 
 	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
 
@@ -1534,6 +1533,8 @@ static const struct pci_device_id alx_pci_tbl[] = {
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
+	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h
index af006b44b2a6..0959e6824cb6 100644
--- a/drivers/net/ethernet/atheros/alx/reg.h
+++ b/drivers/net/ethernet/atheros/alx/reg.h
@@ -37,6 +37,7 @@
 
 #define ALX_DEV_ID_AR8161				0x1091
 #define ALX_DEV_ID_E2200				0xe091
+#define ALX_DEV_ID_E2400				0xe0a1
 #define ALX_DEV_ID_AR8162				0x1090
 #define ALX_DEV_ID_AR8171				0x10A1
 #define ALX_DEV_ID_AR8172				0x10A0
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
new file mode 100644
index 000000000000..a3c7106fdf85
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Kconfig
@@ -0,0 +1,20 @@
+config NET_VENDOR_AURORA
+	bool "Aurora VLSI devices"
+	help
+	  If you have a network (Ethernet) device belonging to this class,
+	  say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  questions about Aurora devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_AURORA
+
+config AURORA_NB8800
+	tristate "Aurora AU-NB8800 support"
+	select PHYLIB
+	help
+	 Support for the AU-NB8800 gigabit Ethernet controller.
+
+endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
new file mode 100644
index 000000000000..6cb528a2fc26
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
new file mode 100644
index 000000000000..ecc4a334c507
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * Mostly rewritten, based on driver from Sigma Designs.  Original
+ * copyright notice below.
+ *
+ *
+ * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
+ *
+ * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/dma-mapping.h>
+#include <linux/phy.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <asm/barrier.h>
+
+#include "nb8800.h"
+
+static void nb8800_tx_done(struct net_device *dev);
+static int nb8800_dma_stop(struct net_device *dev);
+
+static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
+{
+	return readb_relaxed(priv->base + reg);
+}
+
+static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
+{
+	return readl_relaxed(priv->base + reg);
+}
+
+static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
+{
+	writeb_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
+{
+	writew_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
+{
+	writel_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readb(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writeb(priv, reg, new);
+}
+
+static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readl(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writel(priv, reg, new);
+}
+
+static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
+			       bool set)
+{
+	nb8800_maskb(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, 0);
+}
+
+static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
+			       bool set)
+{
+	nb8800_maskl(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, 0);
+}
+
+static int nb8800_mdio_wait(struct mii_bus *bus)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+
+	return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
+					 val, !(val & MDIO_CMD_GO), 1, 1000);
+}
+
+static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
+{
+	struct nb8800_priv *priv = bus->priv;
+	int err;
+
+	err = nb8800_mdio_wait(bus);
+	if (err)
+		return err;
+
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
+	udelay(10);
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
+
+	return nb8800_mdio_wait(bus);
+}
+
+static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+	int err;
+
+	err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
+	if (err)
+		return err;
+
+	val = nb8800_readl(priv, NB8800_MDIO_STS);
+	if (val & MDIO_STS_ERR)
+		return 0xffff;
+
+	return val & 0xffff;
+}
+
+static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
+		MDIO_CMD_DATA(val) | MDIO_CMD_WR;
+
+	return nb8800_mdio_cmd(bus, cmd);
+}
+
+static void nb8800_mac_tx(struct net_device *dev, bool enable)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
+		cpu_relax();
+
+	nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
+}
+
+static void nb8800_mac_rx(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
+}
+
+static void nb8800_mac_af(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
+}
+
+static void nb8800_start_rx(struct net_device *dev)
+{
+	nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
+}
+
+static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
+	int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
+	dma_addr_t dma_addr;
+	struct page *page;
+	unsigned long offset;
+	void *data;
+
+	data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
+	if (!data)
+		return -ENOMEM;
+
+	page = virt_to_head_page(data);
+	offset = data - page_address(page);
+
+	dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
+				DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		skb_free_frag(data);
+		return -ENOMEM;
+	}
+
+	rxb->page = page;
+	rxb->offset = offset;
+	rxd->desc.s_addr = dma_addr;
+
+	return 0;
+}
+
+static void nb8800_receive(struct net_device *dev, unsigned int i,
+			   unsigned int len)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct page *page = priv->rx_bufs[i].page;
+	int offset = priv->rx_bufs[i].offset;
+	void *data = page_address(page) + offset;
+	dma_addr_t dma = rxd->desc.s_addr;
+	struct sk_buff *skb;
+	unsigned int size;
+	int err;
+
+	size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
+
+	skb = napi_alloc_skb(&priv->napi, size);
+	if (!skb) {
+		netdev_err(dev, "rx skb allocation failed\n");
+		dev->stats.rx_dropped++;
+		return;
+	}
+
+	if (len <= RX_COPYBREAK) {
+		dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, len), data, len);
+		dma_sync_single_for_device(&dev->dev, dma, len,
+					   DMA_FROM_DEVICE);
+	} else {
+		err = nb8800_alloc_rx(dev, i, true);
+		if (err) {
+			netdev_err(dev, "rx buffer allocation failed\n");
+			dev->stats.rx_dropped++;
+			return;
+		}
+
+		dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				offset + RX_COPYHDR, len - RX_COPYHDR,
+				RX_BUF_SIZE);
+	}
+
+	skb->protocol = eth_type_trans(skb, dev);
+	napi_gro_receive(&priv->napi, skb);
+}
+
+static void nb8800_rx_error(struct net_device *dev, u32 report)
+{
+	if (report & RX_LENGTH_ERR)
+		dev->stats.rx_length_errors++;
+
+	if (report & RX_FCS_ERR)
+		dev->stats.rx_crc_errors++;
+
+	if (report & RX_FIFO_OVERRUN)
+		dev->stats.rx_fifo_errors++;
+
+	if (report & RX_ALIGNMENT_ERROR)
+		dev->stats.rx_frame_errors++;
+
+	dev->stats.rx_errors++;
+}
+
+static int nb8800_poll(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	unsigned int last = priv->rx_eoc;
+	unsigned int next;
+	int work = 0;
+
+	nb8800_tx_done(dev);
+
+again:
+	while (work < budget) {
+		struct nb8800_rx_buf *rxb;
+		unsigned int len;
+
+		next = (last + 1) % RX_DESC_COUNT;
+
+		rxb = &priv->rx_bufs[next];
+		rxd = &priv->rx_descs[next];
+
+		if (!rxd->report)
+			break;
+
+		len = RX_BYTES_TRANSFERRED(rxd->report);
+
+		if (IS_RX_ERROR(rxd->report))
+			nb8800_rx_error(dev, rxd->report);
+		else
+			nb8800_receive(dev, next, len);
+
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += len;
+
+		if (rxd->report & RX_MULTICAST_PKT)
+			dev->stats.multicast++;
+
+		rxd->report = 0;
+		last = next;
+		work++;
+	}
+
+	if (work) {
+		priv->rx_descs[last].desc.config |= DESC_EOC;
+		wmb();	/* ensure new EOC is written before clearing old */
+		priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
+		priv->rx_eoc = last;
+		nb8800_start_rx(dev);
+	}
+
+	if (work < budget) {
+		nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+		/* If a packet arrived after we last checked but
+		 * before writing RX_ITR, the interrupt will be
+		 * delayed, so we retrieve it now.
+		 */
+		if (priv->rx_descs[next].report)
+			goto again;
+
+		napi_complete_done(napi, work);
+	}
+
+	return work;
+}
+
+static void __nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb;
+	u32 txc_cr;
+
+	txb = &priv->tx_bufs[priv->tx_queue];
+	if (!txb->ready)
+		return;
+
+	txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
+	if (txc_cr & TCR_EN)
+		return;
+
+	nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+	wmb();		/* ensure desc addr is written before starting DMA */
+	nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
+
+	priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
+}
+
+static void nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock_irq(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock_irq(&priv->tx_lock);
+}
+
+static void nb8800_tx_dma_start_irq(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock(&priv->tx_lock);
+}
+
+static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_desc *txd;
+	struct nb8800_tx_buf *txb;
+	struct nb8800_dma_desc *desc;
+	dma_addr_t dma_addr;
+	unsigned int dma_len;
+	unsigned int align;
+	unsigned int next;
+
+	if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	align = (8 - (uintptr_t)skb->data) & 7;
+
+	dma_len = skb->len - align;
+	dma_addr = dma_map_single(&dev->dev, skb->data + align,
+				  dma_len, DMA_TO_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		netdev_err(dev, "tx dma mapping error\n");
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		skb->xmit_more = 0;
+	}
+
+	next = priv->tx_next;
+	txb = &priv->tx_bufs[next];
+	txd = &priv->tx_descs[next];
+	desc = &txd->desc[0];
+
+	next = (next + 1) % TX_DESC_COUNT;
+
+	if (align) {
+		memcpy(txd->buf, skb->data, align);
+
+		desc->s_addr =
+			txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+		desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
+		desc->config = DESC_BTS(2) | DESC_DS | align;
+
+		desc++;
+	}
+
+	desc->s_addr = dma_addr;
+	desc->n_addr = priv->tx_bufs[next].dma_desc;
+	desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
+
+	if (!skb->xmit_more)
+		desc->config |= DESC_EOC;
+
+	txb->skb = skb;
+	txb->dma_addr = dma_addr;
+	txb->dma_len = dma_len;
+
+	if (!priv->tx_chain) {
+		txb->chain_len = 1;
+		priv->tx_chain = txb;
+	} else {
+		priv->tx_chain->chain_len++;
+	}
+
+	netdev_sent_queue(dev, skb->len);
+
+	priv->tx_next = next;
+
+	if (!skb->xmit_more) {
+		smp_wmb();
+		priv->tx_chain->ready = true;
+		priv->tx_chain = NULL;
+		nb8800_tx_dma_start(dev);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void nb8800_tx_error(struct net_device *dev, u32 report)
+{
+	if (report & TX_LATE_COLLISION)
+		dev->stats.collisions++;
+
+	if (report & TX_PACKET_DROPPED)
+		dev->stats.tx_dropped++;
+
+	if (report & TX_FIFO_UNDERRUN)
+		dev->stats.tx_fifo_errors++;
+
+	dev->stats.tx_errors++;
+}
+
+static void nb8800_tx_done(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int limit = priv->tx_next;
+	unsigned int done = priv->tx_done;
+	unsigned int packets = 0;
+	unsigned int len = 0;
+
+	while (done != limit) {
+		struct nb8800_tx_desc *txd = &priv->tx_descs[done];
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
+		struct sk_buff *skb;
+
+		if (!txd->report)
+			break;
+
+		skb = txb->skb;
+		len += skb->len;
+
+		dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
+				 DMA_TO_DEVICE);
+
+		if (IS_TX_ERROR(txd->report)) {
+			nb8800_tx_error(dev, txd->report);
+			kfree_skb(skb);
+		} else {
+			consume_skb(skb);
+		}
+
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
+		dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
+
+		txb->skb = NULL;
+		txb->ready = false;
+		txd->report = 0;
+
+		done = (done + 1) % TX_DESC_COUNT;
+		packets++;
+	}
+
+	if (packets) {
+		smp_mb__before_atomic();
+		atomic_add(packets, &priv->tx_free);
+		netdev_completed_queue(dev, packets, len);
+		netif_wake_queue(dev);
+		priv->tx_done = done;
+	}
+}
+
+static irqreturn_t nb8800_irq(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 val;
+
+	/* tx interrupt */
+	val = nb8800_readl(priv, NB8800_TXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_TXC_SR, val);
+
+		if (val & TSR_DI)
+			nb8800_tx_dma_start_irq(dev);
+
+		if (val & TSR_TI)
+			napi_schedule_irqoff(&priv->napi);
+
+		if (unlikely(val & TSR_DE))
+			netdev_err(dev, "TX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & TSR_TO))
+			netdev_err(dev, "TX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	/* rx interrupt */
+	val = nb8800_readl(priv, NB8800_RXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_RXC_SR, val);
+
+		if (likely(val & (RSR_RI | RSR_DI))) {
+			nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
+			napi_schedule_irqoff(&priv->napi);
+		}
+
+		if (unlikely(val & RSR_DE))
+			netdev_err(dev, "RX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & RSR_RO))
+			netdev_err(dev, "RX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static void nb8800_mac_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	bool gigabit = priv->speed == SPEED_1000;
+	u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
+	u32 mac_mode = 0;
+	u32 slot_time;
+	u32 phy_clk;
+	u32 ict;
+
+	if (!priv->duplex)
+		mac_mode |= HALF_DUPLEX;
+
+	if (gigabit) {
+		if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+			mac_mode |= RGMII_MODE;
+
+		mac_mode |= GMAC_MODE;
+		phy_clk = 125000000;
+
+		/* Should be 512 but register is only 8 bits */
+		slot_time = 255;
+	} else {
+		phy_clk = 25000000;
+		slot_time = 128;
+	}
+
+	ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
+
+	nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
+	nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
+	nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
+}
+
+static void nb8800_pause_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	u32 rxcr;
+
+	if (priv->pause_aneg) {
+		if (!phydev || !phydev->link)
+			return;
+
+		priv->pause_rx = phydev->pause;
+		priv->pause_tx = phydev->pause ^ phydev->asym_pause;
+	}
+
+	nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
+
+	rxcr = nb8800_readl(priv, NB8800_RXC_CR);
+	if (!!(rxcr & RCR_FL) == priv->pause_tx)
+		return;
+
+	if (netif_running(dev)) {
+		napi_disable(&priv->napi);
+		netif_tx_lock_bh(dev);
+		nb8800_dma_stop(dev);
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+		nb8800_start_rx(dev);
+		netif_tx_unlock_bh(dev);
+		napi_enable(&priv->napi);
+	} else {
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+	}
+}
+
+static void nb8800_link_reconfigure(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	int change = 0;
+
+	if (phydev->link) {
+		if (phydev->speed != priv->speed) {
+			priv->speed = phydev->speed;
+			change = 1;
+		}
+
+		if (phydev->duplex != priv->duplex) {
+			priv->duplex = phydev->duplex;
+			change = 1;
+		}
+
+		if (change)
+			nb8800_mac_config(dev);
+
+		nb8800_pause_config(dev);
+	}
+
+	if (phydev->link != priv->link) {
+		priv->link = phydev->link;
+		change = 1;
+	}
+
+	if (change)
+		phy_print_status(priv->phydev);
+}
+
+static void nb8800_update_mac_addr(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
+}
+
+static int nb8800_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sock = addr;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	ether_addr_copy(dev->dev_addr, sock->sa_data);
+	nb8800_update_mac_addr(dev);
+
+	return 0;
+}
+
+static void nb8800_mc_init(struct net_device *dev, int val)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_MC_INIT, val);
+	readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
+				  1, 1000);
+}
+
+static void nb8800_set_rx_mode(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct netdev_hw_addr *ha;
+	int i;
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		nb8800_mac_af(dev, false);
+		return;
+	}
+
+	nb8800_mac_af(dev, true);
+	nb8800_mc_init(dev, 0);
+
+	netdev_for_each_mc_addr(ha, dev) {
+		for (i = 0; i < ETH_ALEN; i++)
+			nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
+
+		nb8800_mc_init(dev, 0xff);
+	}
+}
+
+#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
+#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
+
+static void nb8800_dma_free(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int i;
+
+	if (priv->rx_bufs) {
+		for (i = 0; i < RX_DESC_COUNT; i++)
+			if (priv->rx_bufs[i].page)
+				put_page(priv->rx_bufs[i].page);
+
+		kfree(priv->rx_bufs);
+		priv->rx_bufs = NULL;
+	}
+
+	if (priv->tx_bufs) {
+		for (i = 0; i < TX_DESC_COUNT; i++)
+			kfree_skb(priv->tx_bufs[i].skb);
+
+		kfree(priv->tx_bufs);
+		priv->tx_bufs = NULL;
+	}
+
+	if (priv->rx_descs) {
+		dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
+				  priv->rx_desc_dma);
+		priv->rx_descs = NULL;
+	}
+
+	if (priv->tx_descs) {
+		dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
+				  priv->tx_desc_dma);
+		priv->tx_descs = NULL;
+	}
+}
+
+static void nb8800_dma_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	struct nb8800_tx_desc *txd;
+	unsigned int i;
+
+	for (i = 0; i < RX_DESC_COUNT; i++) {
+		dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
+
+		rxd = &priv->rx_descs[i];
+		rxd->desc.n_addr = rx_dma + sizeof(*rxd);
+		rxd->desc.r_addr =
+			rx_dma + offsetof(struct nb8800_rx_desc, report);
+		rxd->desc.config = priv->rx_dma_config;
+		rxd->report = 0;
+	}
+
+	rxd->desc.n_addr = priv->rx_desc_dma;
+	rxd->desc.config |= DESC_EOC;
+
+	priv->rx_eoc = RX_DESC_COUNT - 1;
+
+	for (i = 0; i < TX_DESC_COUNT; i++) {
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
+		dma_addr_t r_dma = txb->dma_desc +
+			offsetof(struct nb8800_tx_desc, report);
+
+		txd = &priv->tx_descs[i];
+		txd->desc[0].r_addr = r_dma;
+		txd->desc[1].r_addr = r_dma;
+		txd->report = 0;
+	}
+
+	priv->tx_next = 0;
+	priv->tx_queue = 0;
+	priv->tx_done = 0;
+	atomic_set(&priv->tx_free, TX_DESC_COUNT);
+
+	nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
+
+	wmb();		/* ensure all setup is written before starting */
+}
+
+static int nb8800_dma_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int n_rx = RX_DESC_COUNT;
+	unsigned int n_tx = TX_DESC_COUNT;
+	unsigned int i;
+	int err;
+
+	priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
+					    &priv->rx_desc_dma, GFP_KERNEL);
+	if (!priv->rx_descs)
+		goto err_out;
+
+	priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
+	if (!priv->rx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_rx; i++) {
+		err = nb8800_alloc_rx(dev, i, false);
+		if (err)
+			goto err_out;
+	}
+
+	priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
+					    &priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_descs)
+		goto err_out;
+
+	priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
+	if (!priv->tx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_tx; i++)
+		priv->tx_bufs[i].dma_desc =
+			priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
+
+	nb8800_dma_reset(dev);
+
+	return 0;
+
+err_out:
+	nb8800_dma_free(dev);
+
+	return -ENOMEM;
+}
+
+static int nb8800_dma_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
+	struct nb8800_tx_desc *txd = &priv->tx_descs[0];
+	int retry = 5;
+	u32 txcr;
+	u32 rxcr;
+	int err;
+	unsigned int i;
+
+	/* wait for tx to finish */
+	err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
+					!(txcr & TCR_EN) &&
+					priv->tx_done == priv->tx_next,
+					1000, 1000000);
+	if (err)
+		return err;
+
+	/* The rx DMA only stops if it reaches the end of chain.
+	 * To make this happen, we set the EOC flag on all rx
+	 * descriptors, put the device in loopback mode, and send
+	 * a few dummy frames.  The interrupt handler will ignore
+	 * these since NAPI is disabled and no real frames are in
+	 * the tx queue.
+	 */
+
+	for (i = 0; i < RX_DESC_COUNT; i++)
+		priv->rx_descs[i].desc.config |= DESC_EOC;
+
+	txd->desc[0].s_addr =
+		txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+	txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
+	memset(txd->buf, 0, sizeof(txd->buf));
+
+	nb8800_mac_af(dev, false);
+	nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+
+	do {
+		nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+		wmb();
+		nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
+
+		err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
+						rxcr, !(rxcr & RCR_EN),
+						1000, 100000);
+	} while (err && --retry);
+
+	nb8800_mac_af(dev, true);
+	nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+	nb8800_dma_reset(dev);
+
+	return retry ? 0 : -ETIMEDOUT;
+}
+
+static void nb8800_pause_adv(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 adv = 0;
+
+	if (!priv->phydev)
+		return;
+
+	if (priv->pause_rx)
+		adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	if (priv->pause_tx)
+		adv ^= ADVERTISED_Asym_Pause;
+
+	priv->phydev->supported |= adv;
+	priv->phydev->advertising |= adv;
+}
+
+static int nb8800_open(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* clear any pending interrupts */
+	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
+	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
+
+	err = nb8800_dma_init(dev);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
+	if (err)
+		goto err_free_dma;
+
+	nb8800_mac_rx(dev, true);
+	nb8800_mac_tx(dev, true);
+
+	priv->phydev = of_phy_connect(dev, priv->phy_node,
+				      nb8800_link_reconfigure, 0,
+				      priv->phy_mode);
+	if (!priv->phydev)
+		goto err_free_irq;
+
+	nb8800_pause_adv(dev);
+
+	netdev_reset_queue(dev);
+	napi_enable(&priv->napi);
+	netif_start_queue(dev);
+
+	nb8800_start_rx(dev);
+	phy_start(priv->phydev);
+
+	return 0;
+
+err_free_irq:
+	free_irq(dev->irq, dev);
+err_free_dma:
+	nb8800_dma_free(dev);
+
+	return err;
+}
+
+static int nb8800_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	phy_stop(priv->phydev);
+
+	netif_stop_queue(dev);
+	napi_disable(&priv->napi);
+
+	nb8800_dma_stop(dev);
+	nb8800_mac_rx(dev, false);
+	nb8800_mac_tx(dev, false);
+
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
+
+	free_irq(dev->irq, dev);
+
+	nb8800_dma_free(dev);
+
+	return 0;
+}
+
+static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static const struct net_device_ops nb8800_netdev_ops = {
+	.ndo_open		= nb8800_open,
+	.ndo_stop		= nb8800_stop,
+	.ndo_start_xmit		= nb8800_xmit,
+	.ndo_set_mac_address	= nb8800_set_mac_address,
+	.ndo_set_rx_mode	= nb8800_set_rx_mode,
+	.ndo_do_ioctl		= nb8800_ioctl,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int nb8800_nway_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return genphy_restart_aneg(priv->phydev);
+}
+
+static void nb8800_get_pauseparam(struct net_device *dev,
+				  struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	pp->autoneg = priv->pause_aneg;
+	pp->rx_pause = priv->pause_rx;
+	pp->tx_pause = priv->pause_tx;
+}
+
+static int nb8800_set_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	priv->pause_aneg = pp->autoneg;
+	priv->pause_rx = pp->rx_pause;
+	priv->pause_tx = pp->tx_pause;
+
+	nb8800_pause_adv(dev);
+
+	if (!priv->pause_aneg)
+		nb8800_pause_config(dev);
+	else if (priv->phydev)
+		phy_start_aneg(priv->phydev);
+
+	return 0;
+}
+
+static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
+	"rx_bytes_ok",
+	"rx_frames_ok",
+	"rx_undersize_frames",
+	"rx_fragment_frames",
+	"rx_64_byte_frames",
+	"rx_127_byte_frames",
+	"rx_255_byte_frames",
+	"rx_511_byte_frames",
+	"rx_1023_byte_frames",
+	"rx_max_size_frames",
+	"rx_oversize_frames",
+	"rx_bad_fcs_frames",
+	"rx_broadcast_frames",
+	"rx_multicast_frames",
+	"rx_control_frames",
+	"rx_pause_frames",
+	"rx_unsup_control_frames",
+	"rx_align_error_frames",
+	"rx_overrun_frames",
+	"rx_jabber_frames",
+	"rx_bytes",
+	"rx_frames",
+
+	"tx_bytes_ok",
+	"tx_frames_ok",
+	"tx_64_byte_frames",
+	"tx_127_byte_frames",
+	"tx_255_byte_frames",
+	"tx_511_byte_frames",
+	"tx_1023_byte_frames",
+	"tx_max_size_frames",
+	"tx_oversize_frames",
+	"tx_broadcast_frames",
+	"tx_multicast_frames",
+	"tx_control_frames",
+	"tx_pause_frames",
+	"tx_underrun_frames",
+	"tx_single_collision_frames",
+	"tx_multi_collision_frames",
+	"tx_deferred_collision_frames",
+	"tx_late_collision_frames",
+	"tx_excessive_collision_frames",
+	"tx_bytes",
+	"tx_frames",
+	"tx_collisions",
+};
+
+#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
+
+static int nb8800_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return NB8800_NUM_STATS;
+
+	return -EOPNOTSUPP;
+}
+
+static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
+{
+	if (sset == ETH_SS_STATS)
+		memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
+}
+
+static u32 nb8800_read_stat(struct net_device *dev, int index)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_STAT_INDEX, index);
+
+	return nb8800_readl(priv, NB8800_STAT_DATA);
+}
+
+static void nb8800_get_ethtool_stats(struct net_device *dev,
+				     struct ethtool_stats *estats, u64 *st)
+{
+	unsigned int i;
+	u32 rx, tx;
+
+	for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
+		rx = nb8800_read_stat(dev, i);
+		tx = nb8800_read_stat(dev, i | 0x80);
+		st[i] = rx;
+		st[i + NB8800_NUM_STATS / 2] = tx;
+	}
+}
+
+static const struct ethtool_ops nb8800_ethtool_ops = {
+	.get_settings		= nb8800_get_settings,
+	.set_settings		= nb8800_set_settings,
+	.nway_reset		= nb8800_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_pauseparam		= nb8800_get_pauseparam,
+	.set_pauseparam		= nb8800_set_pauseparam,
+	.get_sset_count		= nb8800_get_sset_count,
+	.get_strings		= nb8800_get_strings,
+	.get_ethtool_stats	= nb8800_get_ethtool_stats,
+};
+
+static int nb8800_hw_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 val;
+
+	val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
+	nb8800_writeb(priv, NB8800_TX_CTL1, val);
+
+	/* Collision retry count */
+	nb8800_writeb(priv, NB8800_TX_CTL2, 5);
+
+	val = RX_PAD_STRIP | RX_AF_EN;
+	nb8800_writeb(priv, NB8800_RX_CTL, val);
+
+	/* Chosen by fair dice roll */
+	nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
+
+	/* TX cycles per deferral period */
+	nb8800_writeb(priv, NB8800_TX_SDP, 12);
+
+	/* The following three threshold values have been
+	 * experimentally determined for good results.
+	 */
+
+	/* RX/TX FIFO threshold for partial empty (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
+
+	/* RX/TX FIFO threshold for partial full (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
+
+	/* Buffer size for transmit (64-bit entries) */
+	nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
+
+	/* Configure tx DMA */
+
+	val = nb8800_readl(priv, NB8800_TXC_CR);
+	val &= TCR_LE;		/* keep endian setting */
+	val |= TCR_DM;		/* DMA descriptor mode */
+	val |= TCR_RS;		/* automatically store tx status  */
+	val |= TCR_DIE;		/* interrupt on DMA chain completion */
+	val |= TCR_TFI(7);	/* interrupt after 7 frames transmitted */
+	val |= TCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_TXC_CR, val);
+
+	/* TX complete interrupt after 10 ms or 7 frames (see above) */
+	val = clk_get_rate(priv->clk) / 100;
+	nb8800_writel(priv, NB8800_TX_ITR, val);
+
+	/* Configure rx DMA */
+
+	val = nb8800_readl(priv, NB8800_RXC_CR);
+	val &= RCR_LE;		/* keep endian setting */
+	val |= RCR_DM;		/* DMA descriptor mode */
+	val |= RCR_RS;		/* automatically store rx status */
+	val |= RCR_DIE;		/* interrupt at end of DMA chain */
+	val |= RCR_RFI(7);	/* interrupt after 7 frames received */
+	val |= RCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_RXC_CR, val);
+
+	/* The rx interrupt can fire before the DMA has completed
+	 * unless a small delay is added.  50 us is hopefully enough.
+	 */
+	priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
+
+	/* In NAPI poll mode we want to disable interrupts, but the
+	 * hardware does not permit this.  Delay 10 ms instead.
+	 */
+	priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
+
+	nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+	priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
+
+	/* Flow control settings */
+
+	/* Pause time of 0.1 ms */
+	val = 100000 / 512;
+	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
+	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
+
+	/* Auto-negotiate by default */
+	priv->pause_aneg = true;
+	priv->pause_rx = true;
+	priv->pause_tx = true;
+
+	nb8800_mc_init(dev, 0);
+
+	return 0;
+}
+
+static int nb8800_tangox_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 pad_mode = PAD_MODE_MII;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_GMII:
+		pad_mode = PAD_MODE_MII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+		pad_mode = PAD_MODE_RGMII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+		break;
+
+	default:
+		dev_err(dev->dev.parent, "unsupported phy mode %s\n",
+			phy_modes(priv->phy_mode));
+		return -EINVAL;
+	}
+
+	nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
+
+	return 0;
+}
+
+static int nb8800_tangox_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int clk_div;
+
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
+	usleep_range(1000, 10000);
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
+
+	wmb();		/* ensure reset is cleared before proceeding */
+
+	clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
+	nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tangox_ops = {
+	.init	= nb8800_tangox_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static int nb8800_tango4_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = nb8800_tangox_init(dev);
+	if (err)
+		return err;
+
+	/* On tango4 interrupt on DMA completion per frame works and gives
+	 * better performance despite generating more rx interrupts.
+	 */
+
+	/* Disable unnecessary interrupt on rx completion */
+	nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
+
+	/* Request interrupt on descriptor DMA completion */
+	priv->rx_dma_config |= DESC_ID;
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tango4_ops = {
+	.init	= nb8800_tango4_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static const struct of_device_id nb8800_dt_ids[] = {
+	{
+		.compatible = "aurora,nb8800",
+	},
+	{
+		.compatible = "sigma,smp8642-ethernet",
+		.data = &nb8800_tangox_ops,
+	},
+	{
+		.compatible = "sigma,smp8734-ethernet",
+		.data = &nb8800_tango4_ops,
+	},
+	{ }
+};
+
+static int nb8800_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	const struct nb8800_ops *ops = NULL;
+	struct nb8800_priv *priv;
+	struct resource *res;
+	struct net_device *dev;
+	struct mii_bus *bus;
+	const unsigned char *mac;
+	void __iomem *base;
+	int irq;
+	int ret;
+
+	match = of_match_device(nb8800_dt_ids, &pdev->dev);
+	if (match)
+		ops = match->data;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "No IRQ\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
+
+	dev = alloc_etherdev(sizeof(*priv));
+	if (!dev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	priv = netdev_priv(dev);
+	priv->base = base;
+
+	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	if (priv->phy_mode < 0)
+		priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		ret = PTR_ERR(priv->clk);
+		goto err_free_dev;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto err_free_dev;
+
+	spin_lock_init(&priv->tx_lock);
+
+	if (ops && ops->reset) {
+		ret = ops->reset(dev);
+		if (ret)
+			goto err_free_dev;
+	}
+
+	bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!bus) {
+		ret = -ENOMEM;
+		goto err_disable_clk;
+	}
+
+	bus->name = "nb8800-mii";
+	bus->read = nb8800_mdio_read;
+	bus->write = nb8800_mdio_write;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
+		 (unsigned long)res->start);
+	bus->priv = priv;
+
+	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register MII bus\n");
+		goto err_disable_clk;
+	}
+
+	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		dev_err(&pdev->dev, "no PHY specified\n");
+		ret = -ENODEV;
+		goto err_free_bus;
+	}
+
+	priv->mii_bus = bus;
+
+	ret = nb8800_hw_init(dev);
+	if (ret)
+		goto err_free_bus;
+
+	if (ops && ops->init) {
+		ret = ops->init(dev);
+		if (ret)
+			goto err_free_bus;
+	}
+
+	dev->netdev_ops = &nb8800_netdev_ops;
+	dev->ethtool_ops = &nb8800_ethtool_ops;
+	dev->flags |= IFF_MULTICAST;
+	dev->irq = irq;
+
+	mac = of_get_mac_address(pdev->dev.of_node);
+	if (mac)
+		ether_addr_copy(dev->dev_addr, mac);
+
+	if (!is_valid_ether_addr(dev->dev_addr))
+		eth_hw_addr_random(dev);
+
+	nb8800_update_mac_addr(dev);
+
+	netif_carrier_off(dev);
+
+	ret = register_netdev(dev);
+	if (ret) {
+		netdev_err(dev, "failed to register netdev\n");
+		goto err_free_dma;
+	}
+
+	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
+
+	netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
+
+	return 0;
+
+err_free_dma:
+	nb8800_dma_free(dev);
+err_free_bus:
+	mdiobus_unregister(bus);
+err_disable_clk:
+	clk_disable_unprepare(priv->clk);
+err_free_dev:
+	free_netdev(dev);
+
+	return ret;
+}
+
+static int nb8800_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct nb8800_priv *priv = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+
+	mdiobus_unregister(priv->mii_bus);
+
+	clk_disable_unprepare(priv->clk);
+
+	nb8800_dma_free(ndev);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct platform_driver nb8800_driver = {
+	.driver = {
+		.name		= "nb8800",
+		.of_match_table	= nb8800_dt_ids,
+	},
+	.probe	= nb8800_probe,
+	.remove	= nb8800_remove,
+};
+
+module_platform_driver(nb8800_driver);
+
+MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
+MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
new file mode 100644
index 000000000000..e5adbc2aac9f
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -0,0 +1,316 @@
+#ifndef _NB8800_H_
+#define _NB8800_H_
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+
+#define RX_DESC_COUNT			256
+#define TX_DESC_COUNT			256
+
+#define NB8800_DESC_LOW			4
+
+#define RX_BUF_SIZE			1552
+
+#define RX_COPYBREAK			256
+#define RX_COPYHDR			128
+
+#define MAX_MDC_CLOCK			2500000
+
+/* Stargate Solutions SSN8800 core registers */
+#define NB8800_TX_CTL1			0x000
+#define TX_TPD				BIT(5)
+#define TX_APPEND_FCS			BIT(4)
+#define TX_PAD_EN			BIT(3)
+#define TX_RETRY_EN			BIT(2)
+#define TX_EN				BIT(0)
+
+#define NB8800_TX_CTL2			0x001
+
+#define NB8800_RX_CTL			0x004
+#define RX_BC_DISABLE			BIT(7)
+#define RX_RUNT				BIT(6)
+#define RX_AF_EN			BIT(5)
+#define RX_PAUSE_EN			BIT(3)
+#define RX_SEND_CRC			BIT(2)
+#define RX_PAD_STRIP			BIT(1)
+#define RX_EN				BIT(0)
+
+#define NB8800_RANDOM_SEED		0x008
+#define NB8800_TX_SDP			0x14
+#define NB8800_TX_TPDP1			0x18
+#define NB8800_TX_TPDP2			0x19
+#define NB8800_SLOT_TIME		0x1c
+
+#define NB8800_MDIO_CMD			0x020
+#define MDIO_CMD_GO			BIT(31)
+#define MDIO_CMD_WR			BIT(26)
+#define MDIO_CMD_ADDR(x)		((x) << 21)
+#define MDIO_CMD_REG(x)			((x) << 16)
+#define MDIO_CMD_DATA(x)		((x) <<	 0)
+
+#define NB8800_MDIO_STS			0x024
+#define MDIO_STS_ERR			BIT(31)
+
+#define NB8800_MC_ADDR(i)		(0x028 + (i))
+#define NB8800_MC_INIT			0x02e
+#define NB8800_UC_ADDR(i)		(0x03c + (i))
+
+#define NB8800_MAC_MODE			0x044
+#define RGMII_MODE			BIT(7)
+#define HALF_DUPLEX			BIT(4)
+#define BURST_EN			BIT(3)
+#define LOOPBACK_EN			BIT(2)
+#define GMAC_MODE			BIT(0)
+
+#define NB8800_IC_THRESHOLD		0x050
+#define NB8800_PE_THRESHOLD		0x051
+#define NB8800_PF_THRESHOLD		0x052
+#define NB8800_TX_BUFSIZE		0x054
+#define NB8800_FIFO_CTL			0x056
+#define NB8800_PQ1			0x060
+#define NB8800_PQ2			0x061
+#define NB8800_SRC_ADDR(i)		(0x06a + (i))
+#define NB8800_STAT_DATA		0x078
+#define NB8800_STAT_INDEX		0x07c
+#define NB8800_STAT_CLEAR		0x07d
+
+#define NB8800_SLEEP_MODE		0x07e
+#define SLEEP_MODE			BIT(0)
+
+#define NB8800_WAKEUP			0x07f
+#define WAKEUP				BIT(0)
+
+/* Aurora NB8800 host interface registers */
+#define NB8800_TXC_CR			0x100
+#define TCR_LK				BIT(12)
+#define TCR_DS				BIT(11)
+#define TCR_BTS(x)			(((x) & 0x7) << 8)
+#define TCR_DIE				BIT(7)
+#define TCR_TFI(x)			(((x) & 0x7) << 4)
+#define TCR_LE				BIT(3)
+#define TCR_RS				BIT(2)
+#define TCR_DM				BIT(1)
+#define TCR_EN				BIT(0)
+
+#define NB8800_TXC_SR			0x104
+#define TSR_DE				BIT(3)
+#define TSR_DI				BIT(2)
+#define TSR_TO				BIT(1)
+#define TSR_TI				BIT(0)
+
+#define NB8800_TX_SAR			0x108
+#define NB8800_TX_DESC_ADDR		0x10c
+
+#define NB8800_TX_REPORT_ADDR		0x110
+#define TX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xffff)
+#define TX_FIRST_DEFERRAL		BIT(7)
+#define TX_EARLY_COLLISIONS(x)		(((x) >> 3) & 0xf)
+#define TX_LATE_COLLISION		BIT(2)
+#define TX_PACKET_DROPPED		BIT(1)
+#define TX_FIFO_UNDERRUN		BIT(0)
+#define IS_TX_ERROR(r)			((r) & 0x07)
+
+#define NB8800_TX_FIFO_SR		0x114
+#define NB8800_TX_ITR			0x118
+
+#define NB8800_RXC_CR			0x200
+#define RCR_FL				BIT(13)
+#define RCR_LK				BIT(12)
+#define RCR_DS				BIT(11)
+#define RCR_BTS(x)			(((x) & 7) << 8)
+#define RCR_DIE				BIT(7)
+#define RCR_RFI(x)			(((x) & 7) << 4)
+#define RCR_LE				BIT(3)
+#define RCR_RS				BIT(2)
+#define RCR_DM				BIT(1)
+#define RCR_EN				BIT(0)
+
+#define NB8800_RXC_SR			0x204
+#define RSR_DE				BIT(3)
+#define RSR_DI				BIT(2)
+#define RSR_RO				BIT(1)
+#define RSR_RI				BIT(0)
+
+#define NB8800_RX_SAR			0x208
+#define NB8800_RX_DESC_ADDR		0x20c
+
+#define NB8800_RX_REPORT_ADDR		0x210
+#define RX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xFFFF)
+#define RX_MULTICAST_PKT		BIT(9)
+#define RX_BROADCAST_PKT		BIT(8)
+#define RX_LENGTH_ERR			BIT(7)
+#define RX_FCS_ERR			BIT(6)
+#define RX_RUNT_PKT			BIT(5)
+#define RX_FIFO_OVERRUN			BIT(4)
+#define RX_LATE_COLLISION		BIT(3)
+#define RX_ALIGNMENT_ERROR		BIT(2)
+#define RX_ERROR_MASK			0xfc
+#define IS_RX_ERROR(r)			((r) & RX_ERROR_MASK)
+
+#define NB8800_RX_FIFO_SR		0x214
+#define NB8800_RX_ITR			0x218
+
+/* Sigma Designs SMP86xx additional registers */
+#define NB8800_TANGOX_PAD_MODE		0x400
+#define PAD_MODE_MASK			0x7
+#define PAD_MODE_MII			0x0
+#define PAD_MODE_RGMII			0x1
+#define PAD_MODE_GTX_CLK_INV		BIT(3)
+#define PAD_MODE_GTX_CLK_DELAY		BIT(4)
+
+#define NB8800_TANGOX_MDIO_CLKDIV	0x420
+#define NB8800_TANGOX_RESET		0x424
+
+/* Hardware DMA descriptor */
+struct nb8800_dma_desc {
+	u32				s_addr;	/* start address */
+	u32				n_addr;	/* next descriptor address */
+	u32				r_addr;	/* report address */
+	u32				config;
+} __aligned(8);
+
+#define DESC_ID				BIT(23)
+#define DESC_EOC			BIT(22)
+#define DESC_EOF			BIT(21)
+#define DESC_LK				BIT(20)
+#define DESC_DS				BIT(19)
+#define DESC_BTS(x)			(((x) & 0x7) << 16)
+
+/* DMA descriptor and associated data for rx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_rx_desc {
+	/* DMA descriptor */
+	struct nb8800_dma_desc		desc;
+
+	/* Status report filled in by hardware */
+	u32				report;
+};
+
+/* Address of buffer on rx ring */
+struct nb8800_rx_buf {
+	struct page			*page;
+	unsigned long			offset;
+};
+
+/* DMA descriptors and associated data for tx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_tx_desc {
+	/* DMA descriptor.  The second descriptor is used if packet
+	 * data is unaligned.
+	 */
+	struct nb8800_dma_desc		desc[2];
+
+	/* Status report filled in by hardware */
+	u32				report;
+
+	/* Bounce buffer for initial unaligned part of packet */
+	u8				buf[8] __aligned(8);
+};
+
+/* Packet in tx queue */
+struct nb8800_tx_buf {
+	/* Currently queued skb */
+	struct sk_buff			*skb;
+
+	/* DMA address of the first descriptor */
+	dma_addr_t			dma_desc;
+
+	/* DMA address of packet data */
+	dma_addr_t			dma_addr;
+
+	/* Length of DMA mapping, less than skb->len if alignment
+	 * buffer is used.
+	 */
+	unsigned int			dma_len;
+
+	/* Number of packets in chain starting here */
+	unsigned int			chain_len;
+
+	/* Packet chain ready to be submitted to hardware */
+	bool				ready;
+};
+
+struct nb8800_priv {
+	struct napi_struct		napi;
+
+	void __iomem			*base;
+
+	/* RX DMA descriptors */
+	struct nb8800_rx_desc		*rx_descs;
+
+	/* RX buffers referenced by DMA descriptors */
+	struct nb8800_rx_buf		*rx_bufs;
+
+	/* Current end of chain */
+	u32				rx_eoc;
+
+	/* Value for rx interrupt time register in NAPI interrupt mode */
+	u32				rx_itr_irq;
+
+	/* Value for rx interrupt time register in NAPI poll mode */
+	u32				rx_itr_poll;
+
+	/* Value for config field of rx DMA descriptors */
+	u32				rx_dma_config;
+
+	/* TX DMA descriptors */
+	struct nb8800_tx_desc		*tx_descs;
+
+	/* TX packet queue */
+	struct nb8800_tx_buf		*tx_bufs;
+
+	/* Number of free tx queue entries */
+	atomic_t			tx_free;
+
+	/* First free tx queue entry */
+	u32				tx_next;
+
+	/* Next buffer to transmit */
+	u32				tx_queue;
+
+	/* Start of current packet chain */
+	struct nb8800_tx_buf		*tx_chain;
+
+	/* Next buffer to reclaim */
+	u32				tx_done;
+
+	/* Lock for DMA activation */
+	spinlock_t			tx_lock;
+
+	struct mii_bus			*mii_bus;
+	struct device_node		*phy_node;
+	struct phy_device		*phydev;
+
+	/* PHY connection type from DT */
+	int				phy_mode;
+
+	/* Current link status */
+	int				speed;
+	int				duplex;
+	int				link;
+
+	/* Pause settings */
+	bool				pause_aneg;
+	bool				pause_rx;
+	bool				pause_tx;
+
+	/* DMA base address of rx descriptors, see rx_descs above */
+	dma_addr_t			rx_desc_dma;
+
+	/* DMA base address of tx descriptors, see tx_descs above */
+	dma_addr_t			tx_desc_dma;
+
+	struct clk			*clk;
+};
+
+struct nb8800_ops {
+	int				(*init)(struct net_device *dev);
+	int				(*reset)(struct net_device *dev);
+};
+
+#endif /* _NB8800_H_ */
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 8b1929e9f698..a54bafad3538 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2884,33 +2884,21 @@ struct platform_driver bcm63xx_enet_shared_driver = {
 	},
 };
 
+static struct platform_driver * const drivers[] = {
+	&bcm63xx_enet_shared_driver,
+	&bcm63xx_enet_driver,
+	&bcm63xx_enetsw_driver,
+};
+
 /* entry point */
 static int __init bcm_enet_init(void)
 {
-	int ret;
-
-	ret = platform_driver_register(&bcm63xx_enet_shared_driver);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&bcm63xx_enet_driver);
-	if (ret)
-		platform_driver_unregister(&bcm63xx_enet_shared_driver);
-
-	ret = platform_driver_register(&bcm63xx_enetsw_driver);
-	if (ret) {
-		platform_driver_unregister(&bcm63xx_enet_driver);
-		platform_driver_unregister(&bcm63xx_enet_shared_driver);
-	}
-
-	return ret;
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 static void __exit bcm_enet_exit(void)
 {
-	platform_driver_unregister(&bcm63xx_enet_driver);
-	platform_driver_unregister(&bcm63xx_enetsw_driver);
-	platform_driver_unregister(&bcm63xx_enet_shared_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 0b214b5d944a..cae0956186ce 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -590,8 +590,6 @@ struct bnx2x_fastpath {
 	/* The last maximal completed SGE */
 	u16			last_max_sge;
 	__le16			*rx_cons_sb;
-	unsigned long		rx_pkt,
-				rx_calls;
 
 	/* TPA related */
 	struct bnx2x_agg_info	*tpa_info;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index d9add7c02e42..b3552dd749c4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -558,10 +558,8 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 			put_page(pool->page);
 
 		pool->page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT);
-		if (unlikely(!pool->page)) {
-			BNX2X_ERR("Can't alloc sge\n");
+		if (unlikely(!pool->page))
 			return -ENOMEM;
-		}
 
 		pool->offset = 0;
 	}
@@ -745,7 +743,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 			bnx2x_gro_csum(bp, skb, bnx2x_gro_ipv6_csum);
 			break;
 		default:
-			BNX2X_ERR("Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
+			WARN_ONCE(1, "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
 				  be16_to_cpu(skb->protocol));
 		}
 	}
@@ -1124,9 +1122,6 @@ next_cqe:
 	bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
 			     fp->rx_sge_prod);
 
-	fp->rx_pkt += rx_pkt;
-	fp->rx_calls++;
-
 	return rx_pkt;
 }
 
@@ -3206,42 +3201,32 @@ int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
  */
 static int bnx2x_poll(struct napi_struct *napi, int budget)
 {
-	int work_done = 0;
-	u8 cos;
 	struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
 						 napi);
 	struct bnx2x *bp = fp->bp;
+	int rx_work_done;
+	u8 cos;
 
-	while (1) {
 #ifdef BNX2X_STOP_ON_ERROR
-		if (unlikely(bp->panic)) {
-			napi_complete(napi);
-			return 0;
-		}
+	if (unlikely(bp->panic)) {
+		napi_complete(napi);
+		return 0;
+	}
 #endif
-		for_each_cos_in_tx_queue(fp, cos)
-			if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos]))
-				bnx2x_tx_int(bp, fp->txdata_ptr[cos]);
-
-		if (bnx2x_has_rx_work(fp)) {
-			work_done += bnx2x_rx_int(fp, budget - work_done);
-
-			/* must not complete if we consumed full budget */
-			if (work_done >= budget)
-				break;
-		}
+	for_each_cos_in_tx_queue(fp, cos)
+		if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos]))
+			bnx2x_tx_int(bp, fp->txdata_ptr[cos]);
 
-		/* Fall out from the NAPI loop if needed */
-		if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+	rx_work_done = (bnx2x_has_rx_work(fp)) ? bnx2x_rx_int(fp, budget) : 0;
 
-			/* No need to update SB for FCoE L2 ring as long as
-			 * it's connected to the default SB and the SB
-			 * has been updated when NAPI was scheduled.
-			 */
-			if (IS_FCOE_FP(fp)) {
-				napi_complete(napi);
-				break;
-			}
+	if (rx_work_done < budget) {
+		/* No need to update SB for FCoE L2 ring as long as
+		 * it's connected to the default SB and the SB
+		 * has been updated when NAPI was scheduled.
+		 */
+		if (IS_FCOE_FP(fp)) {
+			napi_complete(napi);
+		} else {
 			bnx2x_update_fpsb_idx(fp);
 			/* bnx2x_has_rx_work() reads the status block,
 			 * thus we need to ensure that status block indices
@@ -3266,12 +3251,13 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 				bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
 					     le16_to_cpu(fp->fp_hc_idx),
 					     IGU_INT_ENABLE, 1);
-				break;
+			} else {
+				rx_work_done = budget;
 			}
 		}
 	}
 
-	return work_done;
+	return rx_work_done;
 }
 
 /* we split the first BD into headers and data BDs
@@ -4444,7 +4430,6 @@ static int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp,
 	/* Limit the CQE producer by the CQE ring size */
 	fp->rx_comp_prod = min_t(u16, NUM_RCQ_RINGS*RCQ_DESC_CNT,
 			       cqe_ring_prod);
-	fp->rx_pkt = fp->rx_calls = 0;
 
 	bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed += failure_cnt;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index a3ce9f2a2335..820b7e04bb5f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -74,118 +74,115 @@ static const struct {
 static const struct {
 	long offset;
 	int size;
-	u32 flags;
-#define STATS_FLAGS_PORT		1
-#define STATS_FLAGS_FUNC		2
-#define STATS_FLAGS_BOTH		(STATS_FLAGS_FUNC | STATS_FLAGS_PORT)
+	bool is_port_stat;
 	char string[ETH_GSTRING_LEN];
 } bnx2x_stats_arr[] = {
 /* 1 */	{ STATS_OFFSET32(total_bytes_received_hi),
-				8, STATS_FLAGS_BOTH, "rx_bytes" },
+				8, false, "rx_bytes" },
 	{ STATS_OFFSET32(error_bytes_received_hi),
-				8, STATS_FLAGS_BOTH, "rx_error_bytes" },
+				8, false, "rx_error_bytes" },
 	{ STATS_OFFSET32(total_unicast_packets_received_hi),
-				8, STATS_FLAGS_BOTH, "rx_ucast_packets" },
+				8, false, "rx_ucast_packets" },
 	{ STATS_OFFSET32(total_multicast_packets_received_hi),
-				8, STATS_FLAGS_BOTH, "rx_mcast_packets" },
+				8, false, "rx_mcast_packets" },
 	{ STATS_OFFSET32(total_broadcast_packets_received_hi),
-				8, STATS_FLAGS_BOTH, "rx_bcast_packets" },
+				8, false, "rx_bcast_packets" },
 	{ STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi),
-				8, STATS_FLAGS_PORT, "rx_crc_errors" },
+				8, true, "rx_crc_errors" },
 	{ STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi),
-				8, STATS_FLAGS_PORT, "rx_align_errors" },
+				8, true, "rx_align_errors" },
 	{ STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi),
-				8, STATS_FLAGS_PORT, "rx_undersize_packets" },
+				8, true, "rx_undersize_packets" },
 	{ STATS_OFFSET32(etherstatsoverrsizepkts_hi),
-				8, STATS_FLAGS_PORT, "rx_oversize_packets" },
+				8, true, "rx_oversize_packets" },
 /* 10 */{ STATS_OFFSET32(rx_stat_etherstatsfragments_hi),
-				8, STATS_FLAGS_PORT, "rx_fragments" },
+				8, true, "rx_fragments" },
 	{ STATS_OFFSET32(rx_stat_etherstatsjabbers_hi),
-				8, STATS_FLAGS_PORT, "rx_jabbers" },
+				8, true, "rx_jabbers" },
 	{ STATS_OFFSET32(no_buff_discard_hi),
-				8, STATS_FLAGS_BOTH, "rx_discards" },
+				8, false, "rx_discards" },
 	{ STATS_OFFSET32(mac_filter_discard),
-				4, STATS_FLAGS_PORT, "rx_filtered_packets" },
+				4, true, "rx_filtered_packets" },
 	{ STATS_OFFSET32(mf_tag_discard),
-				4, STATS_FLAGS_PORT, "rx_mf_tag_discard" },
+				4, true, "rx_mf_tag_discard" },
 	{ STATS_OFFSET32(pfc_frames_received_hi),
-				8, STATS_FLAGS_PORT, "pfc_frames_received" },
+				8, true, "pfc_frames_received" },
 	{ STATS_OFFSET32(pfc_frames_sent_hi),
-				8, STATS_FLAGS_PORT, "pfc_frames_sent" },
+				8, true, "pfc_frames_sent" },
 	{ STATS_OFFSET32(brb_drop_hi),
-				8, STATS_FLAGS_PORT, "rx_brb_discard" },
+				8, true, "rx_brb_discard" },
 	{ STATS_OFFSET32(brb_truncate_hi),
-				8, STATS_FLAGS_PORT, "rx_brb_truncate" },
+				8, true, "rx_brb_truncate" },
 	{ STATS_OFFSET32(pause_frames_received_hi),
-				8, STATS_FLAGS_PORT, "rx_pause_frames" },
+				8, true, "rx_pause_frames" },
 	{ STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi),
-				8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" },
+				8, true, "rx_mac_ctrl_frames" },
 	{ STATS_OFFSET32(nig_timer_max),
-			4, STATS_FLAGS_PORT, "rx_constant_pause_events" },
+				4, true, "rx_constant_pause_events" },
 /* 20 */{ STATS_OFFSET32(rx_err_discard_pkt),
-				4, STATS_FLAGS_BOTH, "rx_phy_ip_err_discards"},
+				4, false, "rx_phy_ip_err_discards"},
 	{ STATS_OFFSET32(rx_skb_alloc_failed),
-				4, STATS_FLAGS_BOTH, "rx_skb_alloc_discard" },
+				4, false, "rx_skb_alloc_discard" },
 	{ STATS_OFFSET32(hw_csum_err),
-				4, STATS_FLAGS_BOTH, "rx_csum_offload_errors" },
+				4, false, "rx_csum_offload_errors" },
 	{ STATS_OFFSET32(driver_xoff),
-				4, STATS_FLAGS_BOTH, "tx_exhaustion_events" },
+				4, false, "tx_exhaustion_events" },
 	{ STATS_OFFSET32(total_bytes_transmitted_hi),
-				8, STATS_FLAGS_BOTH, "tx_bytes" },
+				8, false, "tx_bytes" },
 	{ STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi),
-				8, STATS_FLAGS_PORT, "tx_error_bytes" },
+				8, true, "tx_error_bytes" },
 	{ STATS_OFFSET32(total_unicast_packets_transmitted_hi),
-				8, STATS_FLAGS_BOTH, "tx_ucast_packets" },
+				8, false, "tx_ucast_packets" },
 	{ STATS_OFFSET32(total_multicast_packets_transmitted_hi),
-				8, STATS_FLAGS_BOTH, "tx_mcast_packets" },
+				8, false, "tx_mcast_packets" },
 	{ STATS_OFFSET32(total_broadcast_packets_transmitted_hi),
-				8, STATS_FLAGS_BOTH, "tx_bcast_packets" },
+				8, false, "tx_bcast_packets" },
 	{ STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi),
-				8, STATS_FLAGS_PORT, "tx_mac_errors" },
+				8, true, "tx_mac_errors" },
 	{ STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi),
-				8, STATS_FLAGS_PORT, "tx_carrier_errors" },
+				8, true, "tx_carrier_errors" },
 /* 30 */{ STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi),
-				8, STATS_FLAGS_PORT, "tx_single_collisions" },
+				8, true, "tx_single_collisions" },
 	{ STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi),
-				8, STATS_FLAGS_PORT, "tx_multi_collisions" },
+				8, true, "tx_multi_collisions" },
 	{ STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi),
-				8, STATS_FLAGS_PORT, "tx_deferred" },
+				8, true, "tx_deferred" },
 	{ STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi),
-				8, STATS_FLAGS_PORT, "tx_excess_collisions" },
+				8, true, "tx_excess_collisions" },
 	{ STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi),
-				8, STATS_FLAGS_PORT, "tx_late_collisions" },
+				8, true, "tx_late_collisions" },
 	{ STATS_OFFSET32(tx_stat_etherstatscollisions_hi),
-				8, STATS_FLAGS_PORT, "tx_total_collisions" },
+				8, true, "tx_total_collisions" },
 	{ STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi),
-				8, STATS_FLAGS_PORT, "tx_64_byte_packets" },
+				8, true, "tx_64_byte_packets" },
 	{ STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi),
-			8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" },
+				8, true, "tx_65_to_127_byte_packets" },
 	{ STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi),
-			8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" },
+				8, true, "tx_128_to_255_byte_packets" },
 	{ STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi),
-			8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" },
+				8, true, "tx_256_to_511_byte_packets" },
 /* 40 */{ STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi),
-			8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" },
+				8, true, "tx_512_to_1023_byte_packets" },
 	{ STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi),
-			8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" },
+				8, true, "tx_1024_to_1522_byte_packets" },
 	{ STATS_OFFSET32(etherstatspktsover1522octets_hi),
-			8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" },
+				8, true, "tx_1523_to_9022_byte_packets" },
 	{ STATS_OFFSET32(pause_frames_sent_hi),
-				8, STATS_FLAGS_PORT, "tx_pause_frames" },
+				8, true, "tx_pause_frames" },
 	{ STATS_OFFSET32(total_tpa_aggregations_hi),
-			8, STATS_FLAGS_FUNC, "tpa_aggregations" },
+				8, false, "tpa_aggregations" },
 	{ STATS_OFFSET32(total_tpa_aggregated_frames_hi),
-			8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"},
+				8, false, "tpa_aggregated_frames"},
 	{ STATS_OFFSET32(total_tpa_bytes_hi),
-			8, STATS_FLAGS_FUNC, "tpa_bytes"},
+				8, false, "tpa_bytes"},
 	{ STATS_OFFSET32(recoverable_error),
-			4, STATS_FLAGS_FUNC, "recoverable_errors" },
+				4, false, "recoverable_errors" },
 	{ STATS_OFFSET32(unrecoverable_error),
-			4, STATS_FLAGS_FUNC, "unrecoverable_errors" },
+				4, false, "unrecoverable_errors" },
 	{ STATS_OFFSET32(driver_filtered_tx_pkt),
-			4, STATS_FLAGS_FUNC, "driver_filtered_tx_pkt" },
+				4, false, "driver_filtered_tx_pkt" },
 	{ STATS_OFFSET32(eee_tx_lpi),
-			4, STATS_FLAGS_PORT, "Tx LPI entry count"}
+				4, true, "Tx LPI entry count"}
 };
 
 #define BNX2X_NUM_STATS		ARRAY_SIZE(bnx2x_stats_arr)
@@ -3066,9 +3063,7 @@ static void bnx2x_self_test(struct net_device *dev,
 	}
 }
 
-#define IS_PORT_STAT(i) \
-	((bnx2x_stats_arr[i].flags & STATS_FLAGS_BOTH) == STATS_FLAGS_PORT)
-#define IS_FUNC_STAT(i)		(bnx2x_stats_arr[i].flags & STATS_FLAGS_FUNC)
+#define IS_PORT_STAT(i)		(bnx2x_stats_arr[i].is_port_stat)
 #define HIDE_PORT_STAT(bp)	IS_VF(bp)
 
 /* ethtool statistics are displayed for all regular ethernet queues and the
@@ -3093,7 +3088,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset)
 			num_strings = 0;
 		if (HIDE_PORT_STAT(bp)) {
 			for (i = 0; i < BNX2X_NUM_STATS; i++)
-				if (IS_FUNC_STAT(i))
+				if (!IS_PORT_STAT(i))
 					num_strings++;
 		} else
 			num_strings += BNX2X_NUM_STATS;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 2273576404b4..6c4e3a69976f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10139,8 +10139,8 @@ static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port)
 		DP(BNX2X_MSG_SP, "Invalid vxlan port\n");
 		return;
 	}
-	bp->vxlan_dst_port--;
-	if (bp->vxlan_dst_port)
+	bp->vxlan_dst_port_count--;
+	if (bp->vxlan_dst_port_count)
 		return;
 
 	if (netif_running(bp->dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f2d0dc9b1c41..11446adc03cc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3625,6 +3625,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
 		memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN);
+		memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
 		pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
 		pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings);
@@ -3648,8 +3649,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 		vf->fw_fid = le16_to_cpu(resp->fid);
 		memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN);
-		if (!is_valid_ether_addr(vf->mac_addr))
-			random_ether_addr(vf->mac_addr);
+		if (is_valid_ether_addr(vf->mac_addr))
+			/* overwrite netdev dev_adr with admin VF MAC */
+			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+		else
+			random_ether_addr(bp->dev->dev_addr);
 
 		vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
@@ -3880,6 +3884,8 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
+static int bnxt_cfg_rx_mode(struct bnxt *);
+
 static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 {
 	int rc = 0;
@@ -3946,11 +3952,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 		bp->vnic_info[0].rx_mask |=
 				CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
-	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
-	if (rc) {
-		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc);
+	rc = bnxt_cfg_rx_mode(bp);
+	if (rc)
 		goto err_out;
-	}
 
 	rc = bnxt_hwrm_set_coal(bp);
 	if (rc)
@@ -4863,7 +4867,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 	}
 }
 
-static void bnxt_cfg_rx_mode(struct bnxt *bp)
+static int bnxt_cfg_rx_mode(struct bnxt *bp)
 {
 	struct net_device *dev = bp->dev;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
@@ -4912,6 +4916,7 @@ static void bnxt_cfg_rx_mode(struct bnxt *bp)
 			netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n",
 				   rc);
 			vnic->uc_filter_count = i;
+			return rc;
 		}
 	}
 
@@ -4920,6 +4925,8 @@ skip_uc:
 	if (rc)
 		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
 			   rc);
+
+	return rc;
 }
 
 static netdev_features_t bnxt_fix_features(struct net_device *dev,
@@ -5210,13 +5217,27 @@ init_err:
 static int bnxt_change_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = 0;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr))
+		return -EADDRNOTAVAIL;
+#endif
+
+	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+		return 0;
+
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	if (netif_running(dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+	}
 
-	return 0;
+	return rc;
 }
 
 /* rtnl_lock held */
@@ -5684,15 +5705,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 	dflt_rings = netif_get_num_default_rss_queues();
-	if (BNXT_PF(bp)) {
-		memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN);
+	if (BNXT_PF(bp))
 		bp->pf.max_irqs = max_irqs;
-	} else {
 #if defined(CONFIG_BNXT_SRIOV)
-		memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
+	else
 		bp->vf.max_irqs = max_irqs;
 #endif
-	}
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings);
 	bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
 	bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f4cf68861069..7a9af2887d8e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -804,10 +804,9 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 	if (!is_valid_ether_addr(resp->perm_mac_address))
 		goto update_vf_mac_exit;
 
-	if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
-		goto update_vf_mac_exit;
-
-	memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
+		memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	/* overwrite netdev dev_adr with admin VF MAC */
 	memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
 update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 88c1e1a834f8..169059c92f80 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -1682,6 +1682,8 @@ static void macb_init_hw(struct macb *bp)
 	macb_set_hwaddr(bp);
 
 	config = macb_mdc_clk_div(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	config |= MACB_BF(RBOF, NET_IP_ALIGN);	/* Make eth data aligned */
 	config |= MACB_BIT(PAE);		/* PAuse Enable */
 	config |= MACB_BIT(DRFCS);		/* Discard Rx FCS */
@@ -2416,6 +2418,8 @@ static int macb_init(struct platform_device *pdev)
 	/* Set MII management clock divider */
 	val = macb_mdc_clk_div(bp);
 	val |= macb_dbw(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	macb_writel(bp, NCFGR, val);
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 6e1faea00ca8..d83b0db77821 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -215,12 +215,17 @@
 /* GEM specific NCFGR bitfields. */
 #define GEM_GBE_OFFSET		10 /* Gigabit mode enable */
 #define GEM_GBE_SIZE		1
+#define GEM_PCSSEL_OFFSET	11
+#define GEM_PCSSEL_SIZE		1
 #define GEM_CLK_OFFSET		18 /* MDC clock division */
 #define GEM_CLK_SIZE		3
 #define GEM_DBW_OFFSET		21 /* Data bus width */
 #define GEM_DBW_SIZE		2
 #define GEM_RXCOEN_OFFSET	24
 #define GEM_RXCOEN_SIZE		1
+#define GEM_SGMIIEN_OFFSET	27
+#define GEM_SGMIIEN_SIZE	1
+
 
 /* Constants for data bus width. */
 #define GEM_DBW32		0 /* 32 bit AMBA AHB data bus width */
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index d3950b20feb9..688828865c48 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -120,10 +120,9 @@
  * Calculated for SCLK of 700Mhz
  * value written should be a 1/16th of what is expected
  *
- * 1 tick per 0.05usec = value of 2.2
- * This 10% would be covered in CQ timer thresh value
+ * 1 tick per 0.025usec
  */
-#define NICPF_CLK_PER_INT_TICK		2
+#define NICPF_CLK_PER_INT_TICK		1
 
 /* Time to wait before we decide that a SQ is stuck.
  *
@@ -266,6 +265,7 @@ struct nicvf {
 	u8			tns_mode:1;
 	u8			sqs_mode:1;
 	u8			loopback_supported:1;
+	bool			hw_tso;
 	u16			mtu;
 	struct queue_set	*qs;
 #define	MAX_SQS_PER_VF_SINGLE_NODE		5
@@ -490,6 +490,11 @@ static inline int nic_get_node_id(struct pci_dev *pdev)
 	return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK);
 }
 
+static inline bool pass1_silicon(struct pci_dev *pdev)
+{
+	return pdev->revision < 8;
+}
+
 int nicvf_set_real_num_queues(struct net_device *netdev,
 			      int tx_queues, int rx_queues);
 int nicvf_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index c561fdcb79a7..9f80de4d5016 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -37,6 +37,7 @@ struct nicpf {
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
 	u8			vf_lmac_map[MAX_LMAC];
+	u8			lmac_cnt;
 	struct delayed_work     dwork;
 	struct workqueue_struct *check_link;
 	u8			link[MAX_LMAC];
@@ -54,11 +55,6 @@ struct nicpf {
 	bool			irq_allocated[NIC_PF_MSIX_VECTORS];
 };
 
-static inline bool pass1_silicon(struct nicpf *nic)
-{
-	return nic->pdev->revision < 8;
-}
-
 /* Supported devices */
 static const struct pci_device_id nic_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
@@ -122,7 +118,7 @@ static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
 	 * when PF writes to MBOX(1), in next revisions when
 	 * PF writes to MBOX(0)
 	 */
-	if (pass1_silicon(nic)) {
+	if (pass1_silicon(nic->pdev)) {
 		/* see the comment for nic_reg_write()/nic_reg_read()
 		 * functions above
 		 */
@@ -279,6 +275,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 	u64 lmac_credit;
 
 	nic->num_vf_en = 0;
+	nic->lmac_cnt = 0;
 
 	for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
 		if (!(bgx_map & (1 << bgx)))
@@ -288,6 +285,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 			nic->vf_lmac_map[next_bgx_lmac++] =
 						NIC_SET_VF_LMAC_MAP(bgx, lmac);
 		nic->num_vf_en += lmac_cnt;
+		nic->lmac_cnt += lmac_cnt;
 
 		/* Program LMAC credits */
 		lmac_credit = (1ull << 1); /* channel credit enable */
@@ -397,7 +395,7 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 			padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
 
 		/* Leave RSS_SIZE as '0' to disable RSS */
-		if (pass1_silicon(nic)) {
+		if (pass1_silicon(nic->pdev)) {
 			nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
 				      (vnic << 24) | (padd << 16) |
 				      (rssi_base + rssi));
@@ -467,7 +465,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 	}
 
 	cpi_base = nic->cpi_base[cfg->vf_id];
-	if (pass1_silicon(nic))
+	if (pass1_silicon(nic->pdev))
 		idx_addr = NIC_PF_CPI_0_2047_CFG;
 	else
 		idx_addr = NIC_PF_MPI_0_2047_CFG;
@@ -715,6 +713,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 	case NIC_MBOX_MSG_CFG_DONE:
 		/* Last message of VF config msg sequence */
 		nic->vf_enabled[vf] = true;
+		if (vf >= nic->lmac_cnt)
+			goto unlock;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, true);
 		goto unlock;
 	case NIC_MBOX_MSG_SHUTDOWN:
 		/* First msg in VF teardown sequence */
@@ -722,6 +727,14 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 		if (vf >= nic->num_vf_en)
 			nic->sqs_used[vf - nic->num_vf_en] = false;
 		nic->pqs_vf[vf] = 0;
+
+		if (vf >= nic->lmac_cnt)
+			break;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, false);
 		break;
 	case NIC_MBOX_MSG_ALLOC_SQS:
 		nic_alloc_sqs(nic, &mbx.sqs_alloc);
@@ -940,7 +953,7 @@ static void nic_poll_for_link(struct work_struct *work)
 
 	mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
 
-	for (vf = 0; vf < nic->num_vf_en; vf++) {
+	for (vf = 0; vf < nic->lmac_cnt; vf++) {
 		/* Poll only if VF is UP */
 		if (!nic->vf_enabled[vf])
 			continue;
@@ -1074,8 +1087,7 @@ static void nic_remove(struct pci_dev *pdev)
 
 	if (nic->check_link) {
 		/* Destroy work Queue */
-		cancel_delayed_work(&nic->dwork);
-		flush_workqueue(nic->check_link);
+		cancel_delayed_work_sync(&nic->dwork);
 		destroy_workqueue(nic->check_link);
 	}
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index af54c10945c2..a12b2e38cf61 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -112,6 +112,13 @@ static int nicvf_get_settings(struct net_device *netdev,
 
 	cmd->supported = 0;
 	cmd->transceiver = XCVR_EXTERNAL;
+
+	if (!nic->link_up) {
+		cmd->duplex = DUPLEX_UNKNOWN;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		return 0;
+	}
+
 	if (nic->speed <= 1000) {
 		cmd->port = PORT_MII;
 		cmd->autoneg = AUTONEG_ENABLE;
@@ -125,6 +132,13 @@ static int nicvf_get_settings(struct net_device *netdev,
 	return 0;
 }
 
+static u32 nicvf_get_link(struct net_device *netdev)
+{
+	struct nicvf *nic = netdev_priv(netdev);
+
+	return nic->link_up;
+}
+
 static void nicvf_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *info)
 {
@@ -660,7 +674,7 @@ static int nicvf_set_channels(struct net_device *dev,
 
 static const struct ethtool_ops nicvf_ethtool_ops = {
 	.get_settings		= nicvf_get_settings,
-	.get_link		= ethtool_op_get_link,
+	.get_link		= nicvf_get_link,
 	.get_drvinfo		= nicvf_get_drvinfo,
 	.get_msglevel		= nicvf_get_msglevel,
 	.set_msglevel		= nicvf_set_msglevel,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7f709cbdcd87..c24cb2a86a42 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -525,14 +525,22 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
 		   __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
 		   cqe_tx->sqe_ptr, hdr->subdesc_cnt);
 
-	nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
 	skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
-	/* For TSO offloaded packets only one head SKB needs to be freed */
+	/* For TSO offloaded packets only one SQE will have a valid SKB */
 	if (skb) {
+		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 		prefetch(skb);
 		dev_consume_skb_any(skb);
 		sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
+	} else {
+		/* In case of HW TSO, HW sends a CQE for each segment of a TSO
+		 * packet instead of a single CQE for the whole TSO packet
+		 * transmitted. Each of this CQE points to the same SQE, so
+		 * avoid freeing same SQE multiple times.
+		 */
+		if (!nic->hw_tso)
+			nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 	}
 }
 
@@ -1057,6 +1065,7 @@ int nicvf_stop(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 	netif_tx_stop_all_queues(nic->netdev);
+	nic->link_up = false;
 
 	/* Teardown secondary qsets first */
 	if (!nic->sqs_mode) {
@@ -1211,9 +1220,6 @@ int nicvf_open(struct net_device *netdev)
 	nic->drv_stats.txq_stop = 0;
 	nic->drv_stats.txq_wake = 0;
 
-	netif_carrier_on(netdev);
-	netif_tx_start_all_queues(netdev);
-
 	return 0;
 cleanup:
 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
@@ -1551,6 +1557,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 
+	if (!pass1_silicon(nic->pdev))
+		nic->hw_tso = true;
+
 	netdev->netdev_ops = &nicvf_netdev_ops;
 	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index e404ea837727..d0d1b5490061 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -18,14 +18,6 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
-struct rbuf_info {
-	struct page *page;
-	void	*data;
-	u64	offset;
-};
-
-#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES))
-
 /* Poll a register for a specific value */
 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
 			  u64 reg, int bit_pos, int bits, int val)
@@ -86,8 +78,6 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 					 u32 buf_len, u64 **rbuf)
 {
-	u64 data;
-	struct rbuf_info *rinfo;
 	int order = get_order(buf_len);
 
 	/* Check if request can be accomodated in previous allocated page */
@@ -113,46 +103,28 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 		nic->rb_page_offset = 0;
 	}
 
-	data = (u64)page_address(nic->rb_page) + nic->rb_page_offset;
-
-	/* Align buffer addr to cache line i.e 128 bytes */
-	rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data));
-	/* Save page address for reference updation */
-	rinfo->page = nic->rb_page;
-	/* Store start address for later retrieval */
-	rinfo->data = (void *)data;
-	/* Store alignment offset */
-	rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data);
+	*rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
 
-	data += rinfo->offset;
-
-	/* Give next aligned address to hw for DMA */
-	*rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES);
 	return 0;
 }
 
-/* Retrieve actual buffer start address and build skb for received packet */
+/* Build skb around receive buffer */
 static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
 					   u64 rb_ptr, int len)
 {
+	void *data;
 	struct sk_buff *skb;
-	struct rbuf_info *rinfo;
 
-	rb_ptr = (u64)phys_to_virt(rb_ptr);
-	/* Get buffer start address and alignment offset */
-	rinfo = GET_RBUF_INFO(rb_ptr);
+	data = phys_to_virt(rb_ptr);
 
 	/* Now build an skb to give to stack */
-	skb = build_skb(rinfo->data, RCV_FRAG_LEN);
+	skb = build_skb(data, RCV_FRAG_LEN);
 	if (!skb) {
-		put_page(rinfo->page);
+		put_page(virt_to_page(data));
 		return NULL;
 	}
 
-	/* Set correct skb->data */
-	skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES);
-
-	prefetch((void *)rb_ptr);
+	prefetch(skb->data);
 	return skb;
 }
 
@@ -196,7 +168,6 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 	int head, tail;
 	u64 buf_addr;
 	struct rbdr_entry_t *desc;
-	struct rbuf_info *rinfo;
 
 	if (!rbdr)
 		return;
@@ -212,16 +183,14 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 	while (head != tail) {
 		desc = GET_RBDR_DESC(rbdr, head);
 		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-		rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr));
-		put_page(rinfo->page);
+		put_page(virt_to_page(phys_to_virt(buf_addr)));
 		head++;
 		head &= (rbdr->dmem.q_len - 1);
 	}
 	/* Free SKB of tail desc */
 	desc = GET_RBDR_DESC(rbdr, tail);
 	buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-	rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr));
-	put_page(rinfo->page);
+	put_page(virt_to_page(phys_to_virt(buf_addr)));
 
 	/* Free RBDR ring */
 	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -330,7 +299,7 @@ static int nicvf_init_cmp_queue(struct nicvf *nic,
 		return err;
 
 	cq->desc = cq->dmem.base;
-	cq->thresh = CMP_QUEUE_CQE_THRESH;
+	cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
 	nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
 
 	return 0;
@@ -592,7 +561,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
 	/* Set threshold value for interrupt generation */
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
-			      qidx, nic->cq_coalesce_usecs);
+			      qidx, CMP_QUEUE_TIMER_THRESH);
 }
 
 /* Configures transmit queue */
@@ -956,7 +925,7 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
 {
 	int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
 
-	if (skb_shinfo(skb)->gso_size) {
+	if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
 		subdesc_cnt = nicvf_tso_count_subdescs(skb);
 		return subdesc_cnt;
 	}
@@ -971,7 +940,7 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
  * First subdescriptor for every send descriptor.
  */
 static inline void
-nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
+nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
 			 int subdesc_cnt, struct sk_buff *skb, int len)
 {
 	int proto;
@@ -1007,6 +976,15 @@ nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
 			break;
 		}
 	}
+
+	if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
+		hdr->tso = 1;
+		hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
+		/* For non-tunneled pkts, point this to L2 ethertype */
+		hdr->inner_l3_offset = skb_network_offset(skb) - 2;
+		nic->drv_stats.tx_tso++;
+	}
 }
 
 /* SQ GATHER subdescriptor
@@ -1076,7 +1054,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
 			data_left -= size;
 			tso_build_data(skb, &tso, size);
 		}
-		nicvf_sq_add_hdr_subdesc(sq, hdr_qentry,
+		nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
 					 seg_subdescs - 1, skb, seg_len);
 		sq->skbuff[hdr_qentry] = (u64)NULL;
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
@@ -1129,11 +1107,12 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
 	qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
 
 	/* Check if its a TSO packet */
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
 		return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
 
 	/* Add SQ header subdesc */
-	nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len);
+	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
+				 skb, skb->len);
 
 	/* Add SQ gather subdescs */
 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
@@ -1234,153 +1213,93 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 	return skb;
 }
 
-/* Enable interrupt */
-void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
+static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
 {
 	u64 reg_val;
 
-	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
-
 	switch (int_type) {
 	case NICVF_INTR_CQ:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+		reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
 		break;
 	case NICVF_INTR_SQ:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+		reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
 		break;
 	case NICVF_INTR_RBDR:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+		reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
 		break;
 	case NICVF_INTR_PKT_DROP:
-		reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
+		reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
 		break;
 	case NICVF_INTR_TCP_TIMER:
-		reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
+		reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
 		break;
 	case NICVF_INTR_MBOX:
-		reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT);
+		reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
 		break;
 	case NICVF_INTR_QS_ERR:
-		reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
+		reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
 		break;
 	default:
-		netdev_err(nic->netdev,
-			   "Failed to enable interrupt: unknown type\n");
-		break;
+		reg_val = 0;
 	}
 
-	nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
+	return reg_val;
+}
+
+/* Enable interrupt */
+void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+
+	if (!mask) {
+		netdev_dbg(nic->netdev,
+			   "Failed to enable interrupt: unknown type\n");
+		return;
+	}
+	nicvf_reg_write(nic, NIC_VF_ENA_W1S,
+			nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
 }
 
 /* Disable interrupt */
 void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
 {
-	u64 reg_val = 0;
+	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
 
-	switch (int_type) {
-	case NICVF_INTR_CQ:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
-		break;
-	case NICVF_INTR_SQ:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
-		break;
-	case NICVF_INTR_RBDR:
-		reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
-		break;
-	case NICVF_INTR_PKT_DROP:
-		reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
-		break;
-	case NICVF_INTR_TCP_TIMER:
-		reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
-		break;
-	case NICVF_INTR_MBOX:
-		reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT);
-		break;
-	case NICVF_INTR_QS_ERR:
-		reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
-		break;
-	default:
-		netdev_err(nic->netdev,
+	if (!mask) {
+		netdev_dbg(nic->netdev,
 			   "Failed to disable interrupt: unknown type\n");
-		break;
+		return;
 	}
 
-	nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
+	nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
 }
 
 /* Clear interrupt */
 void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
 {
-	u64 reg_val = 0;
+	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
 
-	switch (int_type) {
-	case NICVF_INTR_CQ:
-		reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
-		break;
-	case NICVF_INTR_SQ:
-		reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
-		break;
-	case NICVF_INTR_RBDR:
-		reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
-		break;
-	case NICVF_INTR_PKT_DROP:
-		reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
-		break;
-	case NICVF_INTR_TCP_TIMER:
-		reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
-		break;
-	case NICVF_INTR_MBOX:
-		reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
-		break;
-	case NICVF_INTR_QS_ERR:
-		reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
-		break;
-	default:
-		netdev_err(nic->netdev,
+	if (!mask) {
+		netdev_dbg(nic->netdev,
 			   "Failed to clear interrupt: unknown type\n");
-		break;
+		return;
 	}
 
-	nicvf_reg_write(nic, NIC_VF_INT, reg_val);
+	nicvf_reg_write(nic, NIC_VF_INT, mask);
 }
 
 /* Check if interrupt is enabled */
 int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
 {
-	u64 reg_val;
-	u64 mask = 0xff;
-
-	reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
-
-	switch (int_type) {
-	case NICVF_INTR_CQ:
-		mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
-		break;
-	case NICVF_INTR_SQ:
-		mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
-		break;
-	case NICVF_INTR_RBDR:
-		mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
-		break;
-	case NICVF_INTR_PKT_DROP:
-		mask = NICVF_INTR_PKT_DROP_MASK;
-		break;
-	case NICVF_INTR_TCP_TIMER:
-		mask = NICVF_INTR_TCP_TIMER_MASK;
-		break;
-	case NICVF_INTR_MBOX:
-		mask = NICVF_INTR_MBOX_MASK;
-		break;
-	case NICVF_INTR_QS_ERR:
-		mask = NICVF_INTR_QS_ERR_MASK;
-		break;
-	default:
-		netdev_err(nic->netdev,
+	u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
+	/* If interrupt type is unknown, we treat it disabled. */
+	if (!mask) {
+		netdev_dbg(nic->netdev,
 			   "Failed to check interrupt enable: unknown type\n");
-		break;
+		return 0;
 	}
 
-	return (reg_val & mask);
+	return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
 }
 
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index fb4957d09914..c5030a7f213a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -75,18 +75,16 @@
  */
 #define CMP_QSIZE		CMP_QUEUE_SIZE2
 #define CMP_QUEUE_LEN		(1ULL << (CMP_QSIZE + 10))
-#define CMP_QUEUE_CQE_THRESH	0
-#define CMP_QUEUE_TIMER_THRESH	220 /* 10usec */
+#define CMP_QUEUE_CQE_THRESH	(NAPI_POLL_WEIGHT / 2)
+#define CMP_QUEUE_TIMER_THRESH	80 /* ~2usec */
 
 #define RBDR_SIZE		RBDR_SIZE0
 #define RCV_BUF_COUNT		(1ULL << (RBDR_SIZE + 13))
 #define MAX_RCV_BUF_COUNT	(1ULL << (RBDR_SIZE6 + 13))
 #define RBDR_THRESH		(RCV_BUF_COUNT / 2)
 #define DMA_BUFFER_LEN		2048 /* In multiples of 128bytes */
-#define RCV_FRAG_LEN	(SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
-			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \
-			 (NICVF_RCV_BUF_ALIGN_BYTES * 2))
-#define RCV_DATA_OFFSET		NICVF_RCV_BUF_ALIGN_BYTES
+#define RCV_FRAG_LEN	 (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
+			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define MAX_CQES_FOR_TX		((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
 				 MAX_CQE_PER_PKT_XMIT)
@@ -108,10 +106,6 @@
 #define NICVF_SQ_BASE_ALIGN_BYTES	128  /* 7 bits */
 
 #define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES)	ALIGN(ADDR, ALIGN_BYTES)
-#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\
-	(NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES)
-#define NICVF_RCV_BUF_ALIGN_LEN(X)\
-	(NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X)
 
 /* Queue enable/disable */
 #define NICVF_SQ_EN		BIT_ULL(19)
diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h
index 3c1de97b1add..9e6d9876bfd0 100644
--- a/drivers/net/ethernet/cavium/thunder/q_struct.h
+++ b/drivers/net/ethernet/cavium/thunder/q_struct.h
@@ -545,25 +545,28 @@ struct sq_hdr_subdesc {
 	u64    subdesc_cnt:8;
 	u64    csum_l4:2;
 	u64    csum_l3:1;
-	u64    rsvd0:5;
+	u64    csum_inner_l4:2;
+	u64    csum_inner_l3:1;
+	u64    rsvd0:2;
 	u64    l4_offset:8;
 	u64    l3_offset:8;
 	u64    rsvd1:4;
 	u64    tot_len:20; /* W0 */
 
-	u64    tso_sdc_cont:8;
-	u64    tso_sdc_first:8;
-	u64    tso_l4_offset:8;
-	u64    tso_flags_last:12;
-	u64    tso_flags_first:12;
-	u64    rsvd2:2;
+	u64    rsvd2:24;
+	u64    inner_l4_offset:8;
+	u64    inner_l3_offset:8;
+	u64    tso_start:8;
+	u64    rsvd3:2;
 	u64    tso_max_paysize:14; /* W1 */
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
 	u64    tot_len:20;
 	u64    rsvd1:4;
 	u64    l3_offset:8;
 	u64    l4_offset:8;
-	u64    rsvd0:5;
+	u64    rsvd0:2;
+	u64    csum_inner_l3:1;
+	u64    csum_inner_l4:2;
 	u64    csum_l3:1;
 	u64    csum_l4:2;
 	u64    subdesc_cnt:8;
@@ -574,12 +577,11 @@ struct sq_hdr_subdesc {
 	u64    subdesc_type:4; /* W0 */
 
 	u64    tso_max_paysize:14;
-	u64    rsvd2:2;
-	u64    tso_flags_first:12;
-	u64    tso_flags_last:12;
-	u64    tso_l4_offset:8;
-	u64    tso_sdc_first:8;
-	u64    tso_sdc_cont:8; /* W1 */
+	u64    rsvd3:2;
+	u64    tso_start:8;
+	u64    inner_l3_offset:8;
+	u64    inner_l4_offset:8;
+	u64    rsvd2:24; /* W1 */
 #endif
 };
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 180aa9fabf48..9df26c2263bc 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -186,6 +186,23 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 }
 EXPORT_SYMBOL(bgx_set_lmac_mac);
 
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
+{
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	u64 cfg;
+
+	if (!bgx)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	if (enable)
+		cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
+	else
+		cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
 	struct bgx *bgx = lmac->bgx;
@@ -612,6 +629,8 @@ static void bgx_poll_for_link(struct work_struct *work)
 		lmac->last_duplex = 1;
 	} else {
 		lmac->link_up = 0;
+		lmac->last_speed = SPEED_UNKNOWN;
+		lmac->last_duplex = DUPLEX_UNKNOWN;
 	}
 
 	if (lmac->last_link != lmac->link_up) {
@@ -654,8 +673,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 	}
 
 	/* Enable lmac */
-	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG,
-		       CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
 
 	/* Restore default cfg, incase low level firmware changed it */
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
@@ -695,8 +713,7 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
 		/* Destroy work queue */
-		cancel_delayed_work(&lmac->dwork);
-		flush_workqueue(lmac->check_link);
+		cancel_delayed_work_sync(&lmac->dwork);
 		destroy_workqueue(lmac->check_link);
 	}
 
@@ -1009,6 +1026,9 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct bgx *bgx = NULL;
 	u8 lmac;
 
+	/* Load octeon mdio driver */
+	octeon_mdiobus_force_mod_depencency();
+
 	bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
 	if (!bgx)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 07b7ec66c60d..149e179363a1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -182,6 +182,8 @@ enum MCAST_MODE {
 #define BCAST_ACCEPT	1
 #define CAM_ACCEPT	1
 
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
 void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
 unsigned bgx_get_map(int node);
 int bgx_get_lmac_count(int node, int bgx);
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index a79813a17b6e..4d187f22c48b 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -65,13 +65,14 @@ config CHELSIO_T3
 	  will be called cxgb3.
 
 config CHELSIO_T4
-	tristate "Chelsio Communications T4/T5 Ethernet support"
+	tristate "Chelsio Communications T4/T5/T6 Ethernet support"
 	depends on PCI && (IPV6 || IPV6=n)
 	select FW_LOADER
 	select MDIO
 	---help---
-	  This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet
-	  adapter and T5 based 40Gb Ethernet adapter.
+	  This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
+	  adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
+	  Ethernet adapters.
 
 	  For general information about Chelsio and our products, visit
 	  our website at <http://www.chelsio.com>.
@@ -85,7 +86,7 @@ config CHELSIO_T4
 	  will be called cxgb4.
 
 config CHELSIO_T4_DCB
-	bool "Data Center Bridging (DCB) Support for Chelsio T4/T5 cards"
+	bool "Data Center Bridging (DCB) Support for Chelsio T4/T5/T6 cards"
 	default n
 	depends on CHELSIO_T4 && DCB
 	---help---
@@ -107,12 +108,12 @@ config CHELSIO_T4_FCOE
 	  If unsure, say N.
 
 config CHELSIO_T4VF
-	tristate "Chelsio Communications T4/T5 Virtual Function Ethernet support"
+	tristate "Chelsio Communications T4/T5/T6 Virtual Function Ethernet support"
 	depends on PCI
 	---help---
-	  This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet
-	  adapters and T5 based 40Gb Ethernet adapters with PCI-E SR-IOV Virtual
-	  Functions.
+	  This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
+	  adapters and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
+	  Ethernet adapters with PCI-E SR-IOV Virtual Functions.
 
 	  For general information about Chelsio and our products, visit
 	  our website at <http://www.chelsio.com>.
diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
index ec5e05052d99..eb462d7db427 100644
--- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c
+++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
@@ -570,7 +570,7 @@ static void pm3393_destroy(struct cmac *cmac)
 	kfree(cmac);
 }
 
-static struct cmac_ops pm3393_ops = {
+static const struct cmac_ops pm3393_ops = {
 	.destroy                 = pm3393_destroy,
 	.reset                   = pm3393_reset,
 	.interrupt_enable        = pm3393_interrupt_enable,
diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
index b0cb388f5e12..6f30b6f78553 100644
--- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
+++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
@@ -666,7 +666,7 @@ static void mac_destroy(struct cmac *mac)
 	kfree(mac);
 }
 
-static struct cmac_ops vsc7326_ops = {
+static const struct cmac_ops vsc7326_ops = {
 	.destroy                  = mac_destroy,
 	.reset                    = mac_reset,
 	.interrupt_handler        = mac_intr_handler,
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 8f7aa53a4c4b..60908eab3b3a 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -701,15 +701,16 @@ static ssize_t attr_store(struct device *d,
 			  ssize_t(*set) (struct net_device *, unsigned int),
 			  unsigned int min_val, unsigned int max_val)
 {
-	char *endp;
 	ssize_t ret;
 	unsigned int val;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	val = simple_strtoul(buf, &endp, 0);
-	if (endp == buf || val < min_val || val > max_val)
+	ret = kstrtouint(buf, 0, &val);
+	if (ret)
+		return ret;
+	if (val < min_val || val > max_val)
 		return -EINVAL;
 
 	rtnl_lock();
@@ -829,14 +830,15 @@ static ssize_t tm_attr_store(struct device *d,
 	struct port_info *pi = netdev_priv(to_net_dev(d));
 	struct adapter *adap = pi->adapter;
 	unsigned int val;
-	char *endp;
 	ssize_t ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	val = simple_strtoul(buf, &endp, 0);
-	if (endp == buf || val > 10000000)
+	ret = kstrtouint(buf, 0, &val);
+	if (ret)
+		return ret;
+	if (val > 10000000)
 		return -EINVAL;
 
 	rtnl_lock();
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index a22768c94200..ee04caa6c4d8 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -709,11 +709,21 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 			return ret;
 	}
 
-	p->cclk = simple_strtoul(vpd.cclk_data, NULL, 10);
-	p->mclk = simple_strtoul(vpd.mclk_data, NULL, 10);
-	p->uclk = simple_strtoul(vpd.uclk_data, NULL, 10);
-	p->mdc = simple_strtoul(vpd.mdc_data, NULL, 10);
-	p->mem_timing = simple_strtoul(vpd.mt_data, NULL, 10);
+	ret = kstrtouint(vpd.cclk_data, 10, &p->cclk);
+	if (ret)
+		return ret;
+	ret = kstrtouint(vpd.mclk_data, 10, &p->mclk);
+	if (ret)
+		return ret;
+	ret = kstrtouint(vpd.uclk_data, 10, &p->uclk);
+	if (ret)
+		return ret;
+	ret = kstrtouint(vpd.mdc_data, 10, &p->mdc);
+	if (ret)
+		return ret;
+	ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing);
+	if (ret)
+		return ret;
 	memcpy(p->sn, vpd.sn_data, SERNUM_LEN);
 
 	/* Old eeproms didn't have port information */
@@ -723,8 +733,12 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	} else {
 		p->port_type[0] = hex_to_bin(vpd.port0_data[0]);
 		p->port_type[1] = hex_to_bin(vpd.port1_data[0]);
-		p->xauicfg[0] = simple_strtoul(vpd.xaui0cfg_data, NULL, 16);
-		p->xauicfg[1] = simple_strtoul(vpd.xaui1cfg_data, NULL, 16);
+		ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]);
+		if (ret)
+			return ret;
+		ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]);
+		if (ret)
+			return ret;
 	}
 
 	ret = hex2bin(p->eth_base, vpd.na_data, 6);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index c308429dd9c7..d288dcf6062f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -118,6 +118,11 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 			ret = clip6_get_mbox(dev, (const struct in6_addr *)lip);
 			if (ret) {
 				write_unlock_bh(&ctbl->lock);
+				dev_err(adap->pdev_dev,
+					"CLIP FW cmd failed with error %d, "
+					"Connections using %pI6c wont be "
+					"offloaded",
+					ret, ce->addr6.sin6_addr.s6_addr);
 				return ret;
 			}
 		} else {
@@ -127,6 +132,9 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 		}
 	} else {
 		write_unlock_bh(&ctbl->lock);
+		dev_info(adap->pdev_dev, "CLIP table overflow, "
+			 "Connections using %pI6c wont be offloaded",
+			 (void *)lip);
 		return -ENOMEM;
 	}
 	write_unlock_bh(&ctbl->lock);
@@ -146,6 +154,9 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6)
 	int hash;
 	int ret = -1;
 
+	if (!ctbl)
+		return;
+
 	hash = clip_addr_hash(ctbl, addr, v6);
 
 	read_lock_bh(&ctbl->lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 55a47de544ea..e01e7228f607 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -483,6 +483,8 @@ struct sge_fl {                     /* SGE free-buffer queue state */
 	unsigned int pidx;          /* producer index */
 	unsigned long alloc_failed; /* # of times buffer allocation failed */
 	unsigned long large_alloc_failed;
+	unsigned long mapping_err;  /* # of RX Buffer DMA Mapping failures */
+	unsigned long low;          /* # of times momentarily starving */
 	unsigned long starving;
 	/* RO fields */
 	unsigned int cntxt_id;      /* SGE context id for the free list */
@@ -618,6 +620,7 @@ struct sge_ofld_txq {               /* state for an SGE offload Tx queue */
 	struct adapter *adap;
 	struct sk_buff_head sendq;  /* list of backpressured packets */
 	struct tasklet_struct qresume_tsk; /* restarts the queue */
+	bool service_ofldq_running; /* service_ofldq() is processing sendq */
 	u8 full;                    /* the Tx ring is full */
 	unsigned long mapping_err;  /* # of I/O MMU packet mapping errors */
 } ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 4269944c5db5..0d579b192350 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2325,6 +2325,8 @@ do { \
 		TL("TxMapErr:", mapping_err);
 		RL("FLAllocErr:", fl.alloc_failed);
 		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
 	} else if (iscsi_idx < iscsi_entries) {
@@ -2359,6 +2361,8 @@ do { \
 		RL("RxNoMem:", stats.nomem);
 		RL("FLAllocErr:", fl.alloc_failed);
 		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
 	} else if (rdma_idx < rdma_entries) {
@@ -2388,6 +2392,8 @@ do { \
 		RL("RxNoMem:", stats.nomem);
 		RL("FLAllocErr:", fl.alloc_failed);
 		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
 	} else if (ciq_idx < ciq_entries) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index a077f9476daf..2a61a42ab033 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -35,79 +35,79 @@ static void set_msglevel(struct net_device *dev, u32 val)
 }
 
 static const char stats_strings[][ETH_GSTRING_LEN] = {
-	"tx_octets_ok		",
-	"tx_frames_ok		",
-	"tx_broadcast_frames	",
-	"tx_multicast_frames	",
-	"tx_unicast_frames	",
-	"tx_error_frames	",
-
-	"tx_frames_64		",
-	"tx_frames_65_to_127	",
-	"tx_frames_128_to_255	",
-	"tx_frames_256_to_511	",
-	"tx_frames_512_to_1023	",
-	"tx_frames_1024_to_1518	",
-	"tx_frames_1519_to_max	",
-
-	"tx_frames_dropped	",
-	"tx_pause_frames	",
-	"tx_ppp0_frames		",
-	"tx_ppp1_frames		",
-	"tx_ppp2_frames		",
-	"tx_ppp3_frames		",
-	"tx_ppp4_frames		",
-	"tx_ppp5_frames		",
-	"tx_ppp6_frames		",
-	"tx_ppp7_frames		",
-
-	"rx_octets_ok		",
-	"rx_frames_ok		",
-	"rx_broadcast_frames	",
-	"rx_multicast_frames	",
-	"rx_unicast_frames	",
-
-	"rx_frames_too_long	",
-	"rx_jabber_errors	",
-	"rx_fcs_errors		",
-	"rx_length_errors	",
-	"rx_symbol_errors	",
-	"rx_runt_frames		",
-
-	"rx_frames_64		",
-	"rx_frames_65_to_127	",
-	"rx_frames_128_to_255	",
-	"rx_frames_256_to_511	",
-	"rx_frames_512_to_1023	",
-	"rx_frames_1024_to_1518	",
-	"rx_frames_1519_to_max	",
-
-	"rx_pause_frames	",
-	"rx_ppp0_frames		",
-	"rx_ppp1_frames		",
-	"rx_ppp2_frames		",
-	"rx_ppp3_frames		",
-	"rx_ppp4_frames		",
-	"rx_ppp5_frames		",
-	"rx_ppp6_frames		",
-	"rx_ppp7_frames		",
-
-	"rx_bg0_frames_dropped	",
-	"rx_bg1_frames_dropped	",
-	"rx_bg2_frames_dropped	",
-	"rx_bg3_frames_dropped	",
-	"rx_bg0_frames_trunc	",
-	"rx_bg1_frames_trunc	",
-	"rx_bg2_frames_trunc	",
-	"rx_bg3_frames_trunc	",
-
-	"tso			",
-	"tx_csum_offload	",
-	"rx_csum_good		",
-	"vlan_extractions	",
-	"vlan_insertions	",
-	"gro_packets		",
-	"gro_merged		",
+	"tx_octets_ok           ",
+	"tx_frames_ok           ",
+	"tx_broadcast_frames    ",
+	"tx_multicast_frames    ",
+	"tx_unicast_frames      ",
+	"tx_error_frames        ",
+
+	"tx_frames_64           ",
+	"tx_frames_65_to_127    ",
+	"tx_frames_128_to_255   ",
+	"tx_frames_256_to_511   ",
+	"tx_frames_512_to_1023  ",
+	"tx_frames_1024_to_1518 ",
+	"tx_frames_1519_to_max  ",
+
+	"tx_frames_dropped      ",
+	"tx_pause_frames        ",
+	"tx_ppp0_frames         ",
+	"tx_ppp1_frames         ",
+	"tx_ppp2_frames         ",
+	"tx_ppp3_frames         ",
+	"tx_ppp4_frames         ",
+	"tx_ppp5_frames         ",
+	"tx_ppp6_frames         ",
+	"tx_ppp7_frames         ",
+
+	"rx_octets_ok           ",
+	"rx_frames_ok           ",
+	"rx_broadcast_frames    ",
+	"rx_multicast_frames    ",
+	"rx_unicast_frames      ",
+
+	"rx_frames_too_long     ",
+	"rx_jabber_errors       ",
+	"rx_fcs_errors          ",
+	"rx_length_errors       ",
+	"rx_symbol_errors       ",
+	"rx_runt_frames         ",
+
+	"rx_frames_64           ",
+	"rx_frames_65_to_127    ",
+	"rx_frames_128_to_255   ",
+	"rx_frames_256_to_511   ",
+	"rx_frames_512_to_1023  ",
+	"rx_frames_1024_to_1518 ",
+	"rx_frames_1519_to_max  ",
+
+	"rx_pause_frames        ",
+	"rx_ppp0_frames         ",
+	"rx_ppp1_frames         ",
+	"rx_ppp2_frames         ",
+	"rx_ppp3_frames         ",
+	"rx_ppp4_frames         ",
+	"rx_ppp5_frames         ",
+	"rx_ppp6_frames         ",
+	"rx_ppp7_frames         ",
+
+	"rx_bg0_frames_dropped  ",
+	"rx_bg1_frames_dropped  ",
+	"rx_bg2_frames_dropped  ",
+	"rx_bg3_frames_dropped  ",
+	"rx_bg0_frames_trunc    ",
+	"rx_bg1_frames_trunc    ",
+	"rx_bg2_frames_trunc    ",
+	"rx_bg3_frames_trunc    ",
+
+	"tso                    ",
+	"tx_csum_offload        ",
+	"rx_csum_good           ",
+	"vlan_extractions       ",
+	"vlan_insertions        ",
+	"gro_packets            ",
+	"gro_merged             ",
 };
 
 static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0d147610a06f..edd706e739fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4865,15 +4865,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 #if IS_ENABLED(CONFIG_IPV6)
-	adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
-					  adapter->clipt_end);
-	if (!adapter->clipt) {
-		/* We tolerate a lack of clip_table, giving up
-		 * some functionality
+	if ((CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) &&
+	    (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
+		/* CLIP functionality is not present in hardware,
+		 * hence disable all offload features
 		 */
 		dev_warn(&pdev->dev,
-			 "could not allocate Clip table, continuing\n");
+			 "CLIP not enabled in hardware, continuing\n");
 		adapter->params.offload = 0;
+	} else {
+		adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
+						  adapter->clipt_end);
+		if (!adapter->clipt) {
+			/* We tolerate a lack of clip_table, giving up
+			 * some functionality
+			 */
+			dev_warn(&pdev->dev,
+				 "could not allocate Clip table, continuing\n");
+			adapter->params.offload = 0;
+		}
 	}
 #endif
 	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 48d8fbb1c220..8d35ce317f67 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -406,7 +406,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
  */
 static inline int reclaimable(const struct sge_txq *q)
 {
-	int hw_cidx = ntohs(q->stat->cidx);
+	int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
 	hw_cidx -= q->cidx;
 	return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
 }
@@ -613,6 +613,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 			__free_pages(pg, s->fl_pg_order);
+			q->mapping_err++;
 			goto out;   /* do not try small pages for this error */
 		}
 		mapping |= RX_LARGE_PG_BUF;
@@ -642,6 +643,7 @@ alloc_small_pages:
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 			put_page(pg);
+			q->mapping_err++;
 			goto out;
 		}
 		*d++ = cpu_to_be64(mapping);
@@ -663,6 +665,7 @@ out:	cred = q->avail - cred;
 
 	if (unlikely(fl_starving(adap, q))) {
 		smp_wmb();
+		q->low++;
 		set_bit(q->cntxt_id - adap->sge.egr_start,
 			adap->sge.starving_fl);
 	}
@@ -1029,6 +1032,30 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
 		*p = 0;
 }
 
+static void *inline_tx_skb_header(const struct sk_buff *skb,
+				  const struct sge_txq *q,  void *pos,
+				  int length)
+{
+	u64 *p;
+	int left = (void *)q->stat - pos;
+
+	if (likely(length <= left)) {
+		memcpy(pos, skb->data, length);
+		pos += length;
+	} else {
+		memcpy(pos, skb->data, left);
+		memcpy(q->desc, skb->data + left, length - left);
+		pos = (void *)q->desc + (length - left);
+	}
+	/* 0-pad to multiple of 16 */
+	p = PTR_ALIGN(pos, 8);
+	if ((uintptr_t)p & 8) {
+		*p = 0;
+		return p + 1;
+	}
+	return p;
+}
+
 /*
  * Figure out what HW csum a packet wants and return the appropriate control
  * bits.
@@ -1320,7 +1347,7 @@ out_free:	dev_kfree_skb_any(skb);
  */
 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
 {
-	int hw_cidx = ntohs(q->stat->cidx);
+	int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
 	int reclaim = hw_cidx - q->cidx;
 
 	if (reclaim < 0)
@@ -1542,24 +1569,50 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
 }
 
 /**
- *	service_ofldq - restart a suspended offload queue
+ *	service_ofldq - service/restart a suspended offload queue
  *	@q: the offload queue
  *
- *	Services an offload Tx queue by moving packets from its packet queue
- *	to the HW Tx ring.  The function starts and ends with the queue locked.
+ *	Services an offload Tx queue by moving packets from its Pending Send
+ *	Queue to the Hardware TX ring.  The function starts and ends with the
+ *	Send Queue locked, but drops the lock while putting the skb at the
+ *	head of the Send Queue onto the Hardware TX Ring.  Dropping the lock
+ *	allows more skbs to be added to the Send Queue by other threads.
+ *	The packet being processed at the head of the Pending Send Queue is
+ *	left on the queue in case we experience DMA Mapping errors, etc.
+ *	and need to give up and restart later.
+ *
+ *	service_ofldq() can be thought of as a task which opportunistically
+ *	uses other threads execution contexts.  We use the Offload Queue
+ *	boolean "service_ofldq_running" to make sure that only one instance
+ *	is ever running at a time ...
  */
 static void service_ofldq(struct sge_ofld_txq *q)
 {
-	u64 *pos;
+	u64 *pos, *before, *end;
 	int credits;
 	struct sk_buff *skb;
+	struct sge_txq *txq;
+	unsigned int left;
 	unsigned int written = 0;
 	unsigned int flits, ndesc;
 
+	/* If another thread is currently in service_ofldq() processing the
+	 * Pending Send Queue then there's nothing to do. Otherwise, flag
+	 * that we're doing the work and continue.  Examining/modifying
+	 * the Offload Queue boolean "service_ofldq_running" must be done
+	 * while holding the Pending Send Queue Lock.
+	 */
+	if (q->service_ofldq_running)
+		return;
+	q->service_ofldq_running = true;
+
 	while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) {
-		/*
-		 * We drop the lock but leave skb on sendq, thus retaining
-		 * exclusive access to the state of the queue.
+		/* We drop the lock while we're working with the skb at the
+		 * head of the Pending Send Queue.  This allows more skbs to
+		 * be added to the Pending Send Queue while we're working on
+		 * this one.  We don't need to lock to guard the TX Ring
+		 * updates because only one thread of execution is ever
+		 * allowed into service_ofldq() at a time.
 		 */
 		spin_unlock(&q->sendq.lock);
 
@@ -1583,9 +1636,32 @@ static void service_ofldq(struct sge_ofld_txq *q)
 		} else {
 			int last_desc, hdr_len = skb_transport_offset(skb);
 
-			memcpy(pos, skb->data, hdr_len);
-			write_sgl(skb, &q->q, (void *)pos + hdr_len,
-				  pos + flits, hdr_len,
+			/* The WR headers  may not fit within one descriptor.
+			 * So we need to deal with wrap-around here.
+			 */
+			before = (u64 *)pos;
+			end = (u64 *)pos + flits;
+			txq = &q->q;
+			pos = (void *)inline_tx_skb_header(skb, &q->q,
+							   (void *)pos,
+							   hdr_len);
+			if (before > (u64 *)pos) {
+				left = (u8 *)end - (u8 *)txq->stat;
+				end = (void *)txq->desc + left;
+			}
+
+			/* If current position is already at the end of the
+			 * ofld queue, reset the current to point to
+			 * start of the queue and update the end ptr as well.
+			 */
+			if (pos == (u64 *)txq->stat) {
+				left = (u8 *)end - (u8 *)txq->stat;
+				end = (void *)txq->desc + left;
+				pos = (void *)txq->desc;
+			}
+
+			write_sgl(skb, &q->q, (void *)pos,
+				  end, hdr_len,
 				  (dma_addr_t *)skb->head);
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 			skb->dev = q->adap->port[0];
@@ -1604,6 +1680,11 @@ static void service_ofldq(struct sge_ofld_txq *q)
 			written = 0;
 		}
 
+		/* Reacquire the Pending Send Queue Lock so we can unlink the
+		 * skb we've just successfully transferred to the TX Ring and
+		 * loop for the next skb which may be at the head of the
+		 * Pending Send Queue.
+		 */
 		spin_lock(&q->sendq.lock);
 		__skb_unlink(skb, &q->sendq);
 		if (is_ofld_imm(skb))
@@ -1611,6 +1692,11 @@ static void service_ofldq(struct sge_ofld_txq *q)
 	}
 	if (likely(written))
 		ring_tx_db(q->adap, &q->q, written);
+
+	/*Indicate that no thread is processing the Pending Send Queue
+	 * currently.
+	 */
+	q->service_ofldq_running = false;
 }
 
 /**
@@ -1624,9 +1710,19 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
 {
 	skb->priority = calc_tx_flits_ofld(skb);       /* save for restart */
 	spin_lock(&q->sendq.lock);
+
+	/* Queue the new skb onto the Offload Queue's Pending Send Queue.  If
+	 * that results in this new skb being the only one on the queue, start
+	 * servicing it.  If there are other skbs already on the list, then
+	 * either the queue is currently being processed or it's been stopped
+	 * for some reason and it'll be restarted at a later time.  Restart
+	 * paths are triggered by events like experiencing a DMA Mapping Error
+	 * or filling the Hardware TX Ring.
+	 */
 	__skb_queue_tail(&q->sendq, skb);
 	if (q->sendq.qlen == 1)
 		service_ofldq(q);
+
 	spin_unlock(&q->sendq.lock);
 	return NET_XMIT_SUCCESS;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index 03ed00c49823..a8dda635456d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -162,6 +162,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x5095),	/* Custom T540-CR-SO */
 	CH_PCI_ID_TABLE_FENTRY(0x5096),	/* Custom T580-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x5097),	/* Custom T520-KR */
+	CH_PCI_ID_TABLE_FENTRY(0x5098),	/* Custom 2x40G QSFP */
+	CH_PCI_ID_TABLE_FENTRY(0x5099),	/* Custom 2x40G QSFP */
+	CH_PCI_ID_TABLE_FENTRY(0x509a),	/* Custom T520-CR */
 
 	/* T6 adapters:
 	 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index fc3044c8ac1c..91b52a21a2e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2802,6 +2802,10 @@
 #define HASHEN_V(x) ((x) << HASHEN_S)
 #define HASHEN_F    HASHEN_V(1U)
 
+#define ASLIPCOMPEN_S    17
+#define ASLIPCOMPEN_V(x) ((x) << ASLIPCOMPEN_S)
+#define ASLIPCOMPEN_F    ASLIPCOMPEN_V(1U)
+
 #define REQQPARERR_S    16
 #define REQQPARERR_V(x) ((x) << REQQPARERR_S)
 #define REQQPARERR_F    REQQPARERR_V(1U)
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 8966f3159bb2..3acde3b9b767 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1990,7 +1990,7 @@ SetMulticastFilter(struct net_device *dev)
 
 static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST;
 
-static int __init de4x5_eisa_probe (struct device *gendev)
+static int de4x5_eisa_probe(struct device *gendev)
 {
 	struct eisa_device *edev;
 	u_long iobase;
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index ed41559bae77..b553409e04ad 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -98,8 +98,7 @@ static int csr0 = 0x01A00000 | 0x4800;
 #elif defined(__mips__)
 static int csr0 = 0x00200000 | 0x4000;
 #else
-#warning Processor architecture undefined!
-static int csr0 = 0x00A00000 | 0x4800;
+static int csr0;
 #endif
 
 /* Operational parameters that usually are not changed. */
@@ -1982,6 +1981,12 @@ static int __init tulip_init (void)
 	pr_info("%s", version);
 #endif
 
+	if (!csr0) {
+		pr_warn("tulip: unknown CPU architecture, using default csr0\n");
+		/* default to 8 longword cache line alignment */
+		csr0 = 0x00A00000 | 0x4800;
+	}
+
 	/* copy module parms into globals */
 	tulip_rx_copybreak = rx_copybreak;
 	tulip_max_interrupt_work = max_interrupt_work;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 9beb3d34d4ba..3c0e4d5c5fef 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -907,7 +907,7 @@ static void init_registers(struct net_device *dev)
 #elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM)
 	i |= 0x4800;
 #else
-#warning Processor architecture undefined
+	dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n");
 	i |= 0x4800;
 #endif
 	iowrite32(i, ioaddr + PCIBusCfg);
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index ff76d4e9dc1b..bee32a9d9876 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -7,7 +7,8 @@ config NET_VENDOR_FREESCALE
 	default y
 	depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
 		   M523x || M527x || M5272 || M528x || M520x || M532x || \
-		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM)
+		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
+		   ARCH_LAYERSCAPE
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index b2a32209ffbf..d2328fc5da57 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3277,7 +3277,6 @@ static void
 fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx)
 {
 	struct device_node *np = pdev->dev.of_node;
-	int err;
 
 	*num_tx = *num_rx = 1;
 
@@ -3285,13 +3284,9 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx)
 		return;
 
 	/* parse the num of tx and rx queues */
-	err = of_property_read_u32(np, "fsl,num-tx-queues", num_tx);
-	if (err)
-		*num_tx = 1;
+	of_property_read_u32(np, "fsl,num-tx-queues", num_tx);
 
-	err = of_property_read_u32(np, "fsl,num-rx-queues", num_rx);
-	if (err)
-		*num_rx = 1;
+	of_property_read_u32(np, "fsl,num-rx-queues", num_rx);
 
 	if (*num_tx < 1 || *num_tx > FEC_ENET_MAX_TX_QS) {
 		dev_warn(&pdev->dev, "Invalid num_tx(=%d), fall back to 1\n",
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index afe7f39cdd7c..25553ee857b4 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -1084,27 +1084,23 @@ static struct platform_driver mpc52xx_fec_driver = {
 /* Module                                                                   */
 /* ======================================================================== */
 
+static struct platform_driver * const drivers[] = {
+#ifdef CONFIG_FEC_MPC52xx_MDIO
+	&mpc52xx_fec_mdio_driver,
+#endif
+	&mpc52xx_fec_driver,
+};
+
 static int __init
 mpc52xx_fec_init(void)
 {
-#ifdef CONFIG_FEC_MPC52xx_MDIO
-	int ret;
-	ret = platform_driver_register(&mpc52xx_fec_mdio_driver);
-	if (ret) {
-		pr_err("failed to register mdio driver\n");
-		return ret;
-	}
-#endif
-	return platform_driver_register(&mpc52xx_fec_driver);
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 static void __exit
 mpc52xx_fec_exit(void)
 {
-	platform_driver_unregister(&mpc52xx_fec_driver);
-#ifdef CONFIG_FEC_MPC52xx_MDIO
-	platform_driver_unregister(&mpc52xx_fec_mdio_driver);
-#endif
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 67b1850c034e..4ce60e0c8341 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -647,9 +647,9 @@ static int gfar_parse_group(struct device_node *np,
 	if (model && strcasecmp(model, "FEC")) {
 		gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1);
 		gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2);
-		if (gfar_irq(grp, TX)->irq == NO_IRQ ||
-		    gfar_irq(grp, RX)->irq == NO_IRQ ||
-		    gfar_irq(grp, ER)->irq == NO_IRQ)
+		if (!gfar_irq(grp, TX)->irq ||
+		    !gfar_irq(grp, RX)->irq ||
+		    !gfar_irq(grp, ER)->irq)
 			return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 664d0c261269..b40fba929d65 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -467,7 +467,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 
 	etsects->irq = platform_get_irq(dev, 0);
 
-	if (etsects->irq == NO_IRQ) {
+	if (etsects->irq < 0) {
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index cec95ac8687d..6ca94dc3dda3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -35,7 +35,7 @@
 #include <linux/phy.h>
 #include <linux/types.h>
 
-#define HNAE_DRIVER_VERSION "1.3.0"
+#define HNAE_DRIVER_VERSION "2.0"
 #define HNAE_DRIVER_NAME "hns"
 #define HNAE_COPYRIGHT "Copyright(c) 2015 Huawei Corporation."
 #define HNAE_DRIVER_STRING "Hisilicon Network Subsystem Driver"
@@ -63,6 +63,7 @@ do { \
 
 #define AE_VERSION_1 ('6' << 16 | '6' << 8 | '0')
 #define AE_VERSION_2 ('1' << 24 | '6' << 16 | '1' << 8 | '0')
+#define AE_IS_VER1(ver) ((ver) == AE_VERSION_1)
 #define AE_NAME_SIZE 16
 
 /* some said the RX and TX RCB format should not be the same in the future. But
@@ -144,23 +145,61 @@ enum hnae_led_state {
 #define HNS_RXD_ASID_S 24
 #define HNS_RXD_ASID_M (0xff << HNS_RXD_ASID_S)
 
+#define HNSV2_TXD_BUFNUM_S 0
+#define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S)
+#define HNSV2_TXD_RI_B   1
+#define HNSV2_TXD_L4CS_B   2
+#define HNSV2_TXD_L3CS_B   3
+#define HNSV2_TXD_FE_B   4
+#define HNSV2_TXD_VLD_B  5
+
+#define HNSV2_TXD_TSE_B   0
+#define HNSV2_TXD_VLAN_EN_B   1
+#define HNSV2_TXD_SNAP_B   2
+#define HNSV2_TXD_IPV6_B   3
+#define HNSV2_TXD_SCTP_B   4
+
 /* hardware spec ring buffer format */
 struct __packed hnae_desc {
 	__le64 addr;
 	union {
 		struct {
-			__le16 asid_bufnum_pid;
+			union {
+				__le16 asid_bufnum_pid;
+				__le16 asid;
+			};
 			__le16 send_size;
-			__le32 flag_ipoffset;
-			__le32 reserved_3[4];
+			union {
+				__le32 flag_ipoffset;
+				struct {
+					__u8 bn_pid;
+					__u8 ra_ri_cs_fe_vld;
+					__u8 ip_offset;
+					__u8 tse_vlan_snap_v6_sctp_nth;
+				};
+			};
+			__le16 mss;
+			__u8 l4_len;
+			__u8 reserved1;
+			__le16 paylen;
+			__u8 vmid;
+			__u8 qid;
+			__le32 reserved2[2];
 		} tx;
 
 		struct {
 			__le32 ipoff_bnum_pid_flag;
 			__le16 pkt_len;
 			__le16 size;
-			__le32 vlan_pri_asid;
-			__le32 reserved_2[3];
+			union {
+				__le32 vlan_pri_asid;
+				struct {
+					__le16 asid;
+					__le16 vlan_cfi_pri;
+				};
+			};
+			__le32 rss_hash;
+			__le32 reserved_1[2];
 		} rx;
 	};
 };
@@ -302,7 +341,8 @@ struct hnae_queue {
 	void __iomem *io_base;
 	phys_addr_t phy_base;
 	struct hnae_ae_dev *dev;	/* the device who use this queue */
-	struct hnae_ring rx_ring, tx_ring;
+	struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
+	struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
 	struct hnae_handle *handle;
 };
 
@@ -435,6 +475,7 @@ struct hnae_ae_ops {
 	int (*set_mac_addr)(struct hnae_handle *handle, void *p);
 	int (*set_mc_addr)(struct hnae_handle *handle, void *addr);
 	int (*set_mtu)(struct hnae_handle *handle, int new_mtu);
+	void (*set_tso_stats)(struct hnae_handle *handle, int enable);
 	void (*update_stats)(struct hnae_handle *handle,
 			     struct net_device_stats *net_stats);
 	void (*get_stats)(struct hnae_handle *handle, u64 *data);
@@ -446,6 +487,12 @@ struct hnae_ae_ops {
 			  enum hnae_led_state status);
 	void (*get_regs)(struct hnae_handle *handle, void *data);
 	int (*get_regs_len)(struct hnae_handle *handle);
+	u32	(*get_rss_key_size)(struct hnae_handle *handle);
+	u32	(*get_rss_indir_size)(struct hnae_handle *handle);
+	int	(*get_rss)(struct hnae_handle *handle, u32 *indir, u8 *key,
+			   u8 *hfunc);
+	int	(*set_rss)(struct hnae_handle *handle, const u32 *indir,
+			   const u8 *key, const u8 hfunc);
 };
 
 struct hnae_ae_dev {
@@ -551,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
 				       struct hnae_desc_cb *res_cb)
 {
 	struct hnae_buf_ops *bops = ring->q->handle->bops;
-	struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
 
 	bops->unmap_buffer(ring, &ring->desc_cb[i]);
 	ring->desc_cb[i] = *res_cb;
-	*res_cb = tmp_cb;
 	ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
 	ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 1a16c0307b47..522b264866b4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -252,7 +252,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
 	if (mac_cb->mac_type != HNAE_PORT_SERVICE)
 		return 0;
 
-	ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, ENABLE);
+	ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, true);
 	if (ret) {
 		dev_err(handle->owner_dev,
 			"mac add mul_mac:%pM port%d  fail, ret = %#x!\n",
@@ -261,7 +261,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
 	}
 
 	ret = hns_mac_set_multi(mac_cb, DSAF_BASE_INNER_PORT_NUM,
-				mac_addr, ENABLE);
+				mac_addr, true);
 	if (ret)
 		dev_err(handle->owner_dev,
 			"mac add mul_mac:%pM port%d  fail, ret = %#x!\n",
@@ -277,12 +277,19 @@ static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu)
 	return hns_mac_set_mtu(mac_cb, new_mtu);
 }
 
+static void hns_ae_set_tso_stats(struct hnae_handle *handle, int enable)
+{
+	struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle);
+
+	hns_ppe_set_tso_enable(ppe_cb, enable);
+}
+
 static int hns_ae_start(struct hnae_handle *handle)
 {
 	int ret;
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
-	ret = hns_mac_vm_config_bc_en(mac_cb, 0, ENABLE);
+	ret = hns_mac_vm_config_bc_en(mac_cb, 0, true);
 	if (ret)
 		return ret;
 
@@ -309,7 +316,7 @@ void hns_ae_stop(struct hnae_handle *handle)
 
 	hns_ae_ring_enable_all(handle, 0);
 
-	(void)hns_mac_vm_config_bc_en(mac_cb, 0, DISABLE);
+	(void)hns_mac_vm_config_bc_en(mac_cb, 0, false);
 }
 
 static void hns_ae_reset(struct hnae_handle *handle)
@@ -334,12 +341,30 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
 	else
 		flag = RCB_INT_FLAG_RX;
 
-	hns_rcb_int_clr_hw(ring->q, flag);
 	hns_rcb_int_ctrl_hw(ring->q, flag, mask);
 }
 
+static void hns_aev2_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
+{
+	u32 flag;
+
+	if (is_tx_ring(ring))
+		flag = RCB_INT_FLAG_TX;
+	else
+		flag = RCB_INT_FLAG_RX;
+
+	hns_rcbv2_int_ctrl_hw(ring->q, flag, mask);
+}
+
 static void hns_ae_toggle_queue_status(struct hnae_queue *queue, u32 val)
 {
+	struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(queue->dev);
+
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		hns_rcb_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
+	else
+		hns_rcbv2_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX);
+
 	hns_rcb_start(queue, val);
 }
 
@@ -730,6 +755,53 @@ int hns_ae_get_regs_len(struct hnae_handle *handle)
 	return total_num;
 }
 
+static u32 hns_ae_get_rss_key_size(struct hnae_handle *handle)
+{
+	return HNS_PPEV2_RSS_KEY_SIZE;
+}
+
+static u32 hns_ae_get_rss_indir_size(struct hnae_handle *handle)
+{
+	return HNS_PPEV2_RSS_IND_TBL_SIZE;
+}
+
+static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key,
+			  u8 *hfunc)
+{
+	struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle);
+
+	/* currently we support only one type of hash function i.e. Toep hash */
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	/* get the RSS Key required by the user */
+	if (key)
+		memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
+
+	/* update the current hash->queue mappings from the shadow RSS table */
+	memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE);
+
+	return 0;
+}
+
+static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle);
+
+	/* set the RSS Hash Key if specififed by the user */
+	if (key)
+		hns_ppe_set_rss_key(ppe_cb, (int *)key);
+
+	/* update the shadow RSS table with user specified qids */
+	memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE);
+
+	/* now update the hardware */
+	hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);
+
+	return 0;
+}
+
 static struct hnae_ae_ops hns_dsaf_ops = {
 	.get_handle = hns_ae_get_handle,
 	.put_handle = hns_ae_put_handle,
@@ -758,19 +830,34 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.set_mc_addr = hns_ae_set_multicast_one,
 	.set_mtu = hns_ae_set_mtu,
 	.update_stats = hns_ae_update_stats,
+	.set_tso_stats = hns_ae_set_tso_stats,
 	.get_stats = hns_ae_get_stats,
 	.get_strings = hns_ae_get_strings,
 	.get_sset_count = hns_ae_get_sset_count,
 	.update_led_status = hns_ae_update_led_status,
 	.set_led_id = hns_ae_cpld_set_led_id,
 	.get_regs = hns_ae_get_regs,
-	.get_regs_len = hns_ae_get_regs_len
+	.get_regs_len = hns_ae_get_regs_len,
+	.get_rss_key_size = hns_ae_get_rss_key_size,
+	.get_rss_indir_size = hns_ae_get_rss_indir_size,
+	.get_rss = hns_ae_get_rss,
+	.set_rss = hns_ae_set_rss
 };
 
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev)
 {
 	struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev;
 
+	switch (dsaf_dev->dsaf_ver) {
+	case AE_VERSION_1:
+		hns_dsaf_ops.toggle_ring_irq = hns_ae_toggle_ring_irq;
+		break;
+	case AE_VERSION_2:
+		hns_dsaf_ops.toggle_ring_irq = hns_aev2_toggle_ring_irq;
+		break;
+	default:
+		break;
+	}
 	ae_dev->ops = &hns_dsaf_ops;
 	ae_dev->dev = dsaf_dev->dev;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 026b38676cba..5ef0e96e918a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -283,7 +283,7 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb,
 }
 
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
-		      u32 port_num, char *addr, u8 en)
+		      u32 port_num, char *addr, bool enable)
 {
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
@@ -295,7 +295,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
 		mac_entry.in_port_num = mac_cb->mac_id;
 		mac_entry.port_num = port_num;
 
-		if (en == DISABLE)
+		if (!enable)
 			ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry);
 		else
 			ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry);
@@ -368,7 +368,7 @@ static void hns_mac_param_get(struct mac_params *param,
  *retuen 0 - success , negative --fail
  */
 static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
-				     u32 port_num, u16 vlan_id, u8 en)
+				     u32 port_num, u16 vlan_id, bool enable)
 {
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
@@ -386,7 +386,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 		mac_entry.in_port_num = mac_cb->mac_id;
 		mac_entry.port_num = port_num;
 
-		if (en == DISABLE)
+		if (!enable)
 			ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry);
 		else
 			ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry);
@@ -403,7 +403,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
  *@en:enable
  *retuen 0 - success , negative --fail
  */
-int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en)
+int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 {
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
@@ -427,7 +427,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en)
 			return ret;
 		mac_entry.port_num = port_num;
 
-		if (en == DISABLE)
+		if (!enable)
 			ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry);
 		else
 			ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry);
@@ -648,7 +648,7 @@ static int hns_mac_init_ex(struct hns_mac_cb *mac_cb)
 
 	hns_mac_adjust_link(mac_cb, mac_cb->speed, !mac_cb->half_duplex);
 
-	ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, ENABLE);
+	ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, true);
 	if (ret)
 		goto free_mac_drv;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 7da95a7581f9..0b052191d751 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -425,8 +425,8 @@ void mac_adjust_link(struct net_device *net_dev);
 void hns_mac_get_link_status(struct hns_mac_cb *mac_cb,	u32 *link_status);
 int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr);
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
-		      u32 port_num, char *addr, u8 en);
-int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, u8 en);
+		      u32 port_num, char *addr, bool enable);
+int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable);
 void hns_mac_start(struct hns_mac_cb *mac_cb);
 void hns_mac_stop(struct hns_mac_cb *mac_cb);
 int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 2a98eba660c0..636b205a2366 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -38,10 +38,10 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 	const char *name, *mode_str;
 	struct device_node *np = dsaf_dev->dev->of_node;
 
-	if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v2"))
-		dsaf_dev->dsaf_ver = AE_VERSION_2;
-	else
+	if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1"))
 		dsaf_dev->dsaf_ver = AE_VERSION_1;
+	else
+		dsaf_dev->dsaf_ver = AE_VERSION_2;
 
 	ret = of_property_read_string(np, "dsa_name", &name);
 	if (ret) {
@@ -274,6 +274,8 @@ static void hns_dsaf_stp_port_type_cfg(struct dsaf_device *dsaf_dev,
 	}
 }
 
+#define HNS_DSAF_SBM_NUM(dev) \
+	(AE_IS_VER1((dev)->dsaf_ver) ? DSAF_SBM_NUM : DSAFV2_SBM_NUM)
 /**
  * hns_dsaf_sbm_cfg - config sbm
  * @dsaf_id: dsa fabric id
@@ -283,7 +285,7 @@ static void hns_dsaf_sbm_cfg(struct dsaf_device *dsaf_dev)
 	u32 o_sbm_cfg;
 	u32 i;
 
-	for (i = 0; i < DSAF_SBM_NUM; i++) {
+	for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) {
 		o_sbm_cfg = dsaf_read_dev(dsaf_dev,
 					  DSAF_SBM_CFG_REG_0_REG + 0x80 * i);
 		dsaf_set_bit(o_sbm_cfg, DSAF_SBM_CFG_EN_S, 1);
@@ -304,13 +306,19 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev)
 	u32 reg;
 	u32 read_cnt;
 
-	for (i = 0; i < DSAF_SBM_NUM; i++) {
+	/* validate configure by setting SBM_CFG_MIB_EN bit from 0 to 1. */
+	for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) {
+		reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i;
+		dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 0);
+	}
+
+	for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) {
 		reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i;
 		dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 1);
 	}
 
 	/* waitint for all sbm enable finished */
-	for (i = 0; i < DSAF_SBM_NUM; i++) {
+	for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) {
 		read_cnt = 0;
 		reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i;
 		do {
@@ -338,83 +346,156 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev)
  */
 static void hns_dsaf_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
 {
-	u32 o_sbm_bp_cfg0;
-	u32 o_sbm_bp_cfg1;
-	u32 o_sbm_bp_cfg2;
-	u32 o_sbm_bp_cfg3;
+	u32 o_sbm_bp_cfg;
 	u32 reg;
 	u32 i;
 
 	/* XGE */
 	for (i = 0; i < DSAF_XGE_NUM; i++) {
 		reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg0 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M,
 			       DSAF_SBM_CFG0_COM_MAX_BUF_NUM_S, 512);
-		dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M,
 			       DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0);
-		dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M,
 			       DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg0);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
 		reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg1 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M,
 			       DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0);
-		dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M,
 			       DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg1);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
 		reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_SET_BUF_NUM_S, 104);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_RESET_BUF_NUM_S, 128);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
 		reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg3,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
 			       DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110);
-		dsaf_set_field(o_sbm_bp_cfg3,
+		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
 			       DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 
 		/* for no enable pfc mode */
 		reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg3,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
 			       DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 128);
-		dsaf_set_field(o_sbm_bp_cfg3,
+		dsaf_set_field(o_sbm_bp_cfg,
 			       DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
 			       DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 192);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 	}
 
 	/* PPE */
 	for (i = 0; i < DSAF_COMM_CHN; i++) {
 		reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_SET_BUF_NUM_S, 10);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_RESET_BUF_NUM_S, 12);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 	}
 
 	/* RoCEE */
 	for (i = 0; i < DSAF_COMM_CHN; i++) {
 		reg = DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i;
-		o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M,
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_SET_BUF_NUM_S, 2);
-		dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
+		dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M,
 			       DSAF_SBM_CFG2_RESET_BUF_NUM_S, 4);
-		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+	}
+}
+
+static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev)
+{
+	u32 o_sbm_bp_cfg;
+	u32 reg;
+	u32 i;
+
+	/* XGE */
+	for (i = 0; i < DSAFV2_SBM_XGE_CHN; i++) {
+		reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M,
+			       DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S, 256);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M,
+			       DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M,
+			       DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+
+		reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M,
+			       DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M,
+			       DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+
+		reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 104);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 128);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+
+		reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
+			       DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
+			       DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+
+		/* for no enable pfc mode */
+		reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M,
+			       DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128);
+		dsaf_set_field(o_sbm_bp_cfg,
+			       DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M,
+			       DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+	}
+
+	/* PPE */
+	reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i;
+	o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+	dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M,
+		       DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 10);
+	dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M,
+		       DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 12);
+	dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
+	/* RoCEE */
+	for (i = 0; i < DASFV2_ROCEE_CRD_NUM; i++) {
+		reg = DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i;
+		o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 2);
+		dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M,
+			       DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 4);
+		dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
 	}
 }
 
@@ -985,11 +1066,38 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev)
 	else
 		tc_cfg = HNS_DSAF_I8TC_CFG;
 
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		for (i = 0; i < DSAF_INODE_NUM; i++) {
+			reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i;
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAF_INODE_IN_PORT_NUM_M,
+					   DSAF_INODE_IN_PORT_NUM_S,
+					   i % DSAF_XGE_NUM);
+		}
+	} else {
+		for (i = 0; i < DSAF_PORT_TYPE_NUM; i++) {
+			reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i;
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAF_INODE_IN_PORT_NUM_M,
+					   DSAF_INODE_IN_PORT_NUM_S, 0);
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAFV2_INODE_IN_PORT1_NUM_M,
+					   DSAFV2_INODE_IN_PORT1_NUM_S, 1);
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAFV2_INODE_IN_PORT2_NUM_M,
+					   DSAFV2_INODE_IN_PORT2_NUM_S, 2);
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAFV2_INODE_IN_PORT3_NUM_M,
+					   DSAFV2_INODE_IN_PORT3_NUM_S, 3);
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAFV2_INODE_IN_PORT4_NUM_M,
+					   DSAFV2_INODE_IN_PORT4_NUM_S, 4);
+			dsaf_set_dev_field(dsaf_dev, reg,
+					   DSAFV2_INODE_IN_PORT5_NUM_M,
+					   DSAFV2_INODE_IN_PORT5_NUM_S, 5);
+		}
+	}
 	for (i = 0; i < DSAF_INODE_NUM; i++) {
-		reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i;
-		dsaf_set_dev_field(dsaf_dev, reg, DSAF_INODE_IN_PORT_NUM_M,
-				   DSAF_INODE_IN_PORT_NUM_S, i % DSAF_XGE_NUM);
-
 		reg = DSAF_INODE_PRI_TC_CFG_0_REG + 0x80 * i;
 		dsaf_write_dev(dsaf_dev, reg, tc_cfg);
 	}
@@ -1002,10 +1110,17 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev)
 static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev)
 {
 	u32 flag;
+	u32 finish_msk;
 	u32 cnt = 0;
 	int ret;
 
-	hns_dsaf_sbm_bp_wl_cfg(dsaf_dev);
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		hns_dsaf_sbm_bp_wl_cfg(dsaf_dev);
+		finish_msk = DSAF_SRAM_INIT_OVER_M;
+	} else {
+		hns_dsafv2_sbm_bp_wl_cfg(dsaf_dev);
+		finish_msk = DSAFV2_SRAM_INIT_OVER_M;
+	}
 
 	/* enable sbm chanel, disable sbm chanel shcut function*/
 	hns_dsaf_sbm_cfg(dsaf_dev);
@@ -1024,11 +1139,13 @@ static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev)
 
 	do {
 		usleep_range(200, 210);/*udelay(200);*/
-		flag = dsaf_read_dev(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG);
+		flag = dsaf_get_dev_field(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG,
+					  finish_msk, DSAF_SRAM_INIT_OVER_S);
 		cnt++;
-	} while (flag != DSAF_SRAM_INIT_FINISH_FLAG && cnt < DSAF_CFG_READ_CNT);
+	} while (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S) &&
+		 cnt < DSAF_CFG_READ_CNT);
 
-	if (flag != DSAF_SRAM_INIT_FINISH_FLAG) {
+	if (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S)) {
 		dev_err(dsaf_dev->dev,
 			"hns_dsaf_sbm_init fail %s, flag=%d, cnt=%d\n",
 			dsaf_dev->ae_dev.name, flag, cnt);
@@ -2032,7 +2149,7 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data)
 		DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4);
 
 	/* dsaf inode registers */
-	for (i = 0; i < DSAF_SBM_NUM / DSAF_COMM_CHN; i++) {
+	for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) {
 		j = i * DSAF_COMM_CHN + port;
 		p[232 + i] = dsaf_read_dev(ddev,
 				DSAF_SBM_CFG_REG_0_REG + j * 0x80);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index b2b93484995c..31c312f9826e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -19,24 +19,20 @@ struct hns_mac_cb;
 #define DSAF_DRV_NAME "hns_dsaf"
 #define DSAF_MOD_VERSION "v1.0"
 
-#define ENABLE		(0x1)
-#define DISABLE		(0x0)
+#define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000
 
-#define HNS_DSAF_DEBUG_NW_REG_OFFSET (0x100000)
+#define DSAF_BASE_INNER_PORT_NUM 127/* mac tbl qid*/
 
-#define DSAF_BASE_INNER_PORT_NUM (127)  /* mac tbl qid*/
+#define DSAF_MAX_CHIP_NUM 2  /*max 2 chips */
 
-#define DSAF_MAX_CHIP_NUM (2)  /*max 2 chips */
+#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE 22
 
-#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE (22)
+#define HNS_DSAF_MAX_DESC_CNT 1024
+#define HNS_DSAF_MIN_DESC_CNT 16
 
-#define HNS_DSAF_MAX_DESC_CNT (1024)
-#define HNS_DSAF_MIN_DESC_CNT (16)
+#define DSAF_INVALID_ENTRY_IDX 0xffff
 
-#define DSAF_INVALID_ENTRY_IDX (0xffff)
-
-#define DSAF_CFG_READ_CNT   (30)
-#define DSAF_SRAM_INIT_FINISH_FLAG (0xff)
+#define DSAF_CFG_READ_CNT   30
 
 #define MAC_NUM_OCTETS_PER_ADDR 6
 
@@ -274,10 +270,6 @@ struct dsaf_device {
 	struct device *dev;
 	struct hnae_ae_dev ae_dev;
 
-	void *priv;
-
-	int virq[DSAF_IRQ_NUM];
-
 	u8 __iomem *sc_base;
 	u8 __iomem *sds_base;
 	u8 __iomem *ppe_base;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 523e9b83d304..607c3be42241 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -149,7 +149,11 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
 
 	if (port < DSAF_SERVICE_NW_NUM) {
 		reg_val_1  = 0x1 << port;
-		reg_val_2  = 0x1041041 << port;
+		/* there is difference between V1 and V2 in register.*/
+		if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+			reg_val_2  = 0x1041041 << port;
+		else
+			reg_val_2  = 0x2082082 << port;
 
 		if (val == 0) {
 			dsaf_write_reg(dsaf_dev->sc_base,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 67f33f185a44..f302ef9073c6 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -19,6 +19,48 @@
 
 #include "hns_dsaf_ppe.h"
 
+void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value)
+{
+	dsaf_set_dev_bit(ppe_cb, PPEV2_CFG_TSO_EN_REG, 0, !!value);
+}
+
+void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb,
+			 const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM])
+{
+	int key_item = 0;
+
+	for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++)
+		dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4,
+			       rss_key[key_item]);
+}
+
+void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb,
+			     const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE])
+{
+	int i;
+	int reg_value;
+
+	for (i = 0; i < (HNS_PPEV2_RSS_IND_TBL_SIZE / 4); i++) {
+		reg_value = dsaf_read_dev(ppe_cb,
+					  PPEV2_INDRECTION_TBL_REG + i * 0x4);
+
+		dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N0_M,
+			       PPEV2_CFG_RSS_TBL_4N0_S,
+			       rss_tab[i * 4 + 0] & 0x1F);
+		dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N1_M,
+			       PPEV2_CFG_RSS_TBL_4N1_S,
+				rss_tab[i * 4 + 1] & 0x1F);
+		dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N2_M,
+			       PPEV2_CFG_RSS_TBL_4N2_S,
+				rss_tab[i * 4 + 2] & 0x1F);
+		dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N3_M,
+			       PPEV2_CFG_RSS_TBL_4N3_S,
+				rss_tab[i * 4 + 3] & 0x1F);
+		dsaf_write_dev(
+			ppe_cb, PPEV2_INDRECTION_TBL_REG + i * 0x4, reg_value);
+	}
+}
+
 static void __iomem *hns_ppe_common_get_ioaddr(
 	struct ppe_common_cb *ppe_common)
 {
@@ -134,6 +176,11 @@ static void hns_ppe_cnt_clr_ce(struct hns_ppe_cb *ppe_cb)
 			 PPE_CNT_CLR_CE_B, 1);
 }
 
+static void hns_ppe_set_vlan_strip(struct hns_ppe_cb *ppe_cb, int en)
+{
+	dsaf_write_dev(ppe_cb, PPEV2_VLAN_STRIP_EN_REG, en);
+}
+
 /**
  * hns_ppe_checksum_hw - set ppe checksum caculate
  * @ppe_device: ppe device
@@ -266,13 +313,17 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en)
 
 /**
  * ppe_init_hw - init ppe
- * @ppe_device: ppe device
+ * @ppe_cb: ppe device
  */
 static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 {
 	struct ppe_common_cb *ppe_common_cb = ppe_cb->ppe_common_cb;
 	u32 port = ppe_cb->port;
 	struct dsaf_device *dsaf_dev = ppe_common_cb->dsaf_dev;
+	int i;
+
+	/* get default RSS key */
+	netdev_rss_key_fill(ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
 
 	hns_ppe_srst_by_port(dsaf_dev, port, 0);
 	mdelay(10);
@@ -285,8 +336,21 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 		hns_ppe_set_port_mode(ppe_cb, PPE_MODE_GE);
 	else
 		hns_ppe_set_port_mode(ppe_cb, PPE_MODE_XGE);
+
 	hns_ppe_checksum_hw(ppe_cb, 0xffffffff);
 	hns_ppe_cnt_clr_ce(ppe_cb);
+
+	if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		hns_ppe_set_vlan_strip(ppe_cb, 0);
+
+		/* set default RSS key in h/w */
+		hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key);
+
+		/* Set default indrection table in h/w */
+		for (i = 0; i < HNS_PPEV2_RSS_IND_TBL_SIZE; i++)
+			ppe_cb->rss_indir_table[i] = i;
+		hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);
+	}
 }
 
 /**
@@ -341,13 +405,13 @@ void hns_ppe_reset_common(struct dsaf_device *dsaf_dev, u8 ppe_common_index)
 	if (ret)
 		return;
 
+	for (i = 0; i < ppe_common->ppe_num; i++)
+		hns_ppe_init_hw(&ppe_common->ppe_cb[i]);
+
 	ret = hns_rcb_common_init_hw(dsaf_dev->rcb_common[ppe_common_index]);
 	if (ret)
 		return;
 
-	for (i = 0; i < ppe_common->ppe_num; i++)
-		hns_ppe_init_hw(&ppe_common->ppe_cb[i]);
-
 	hns_rcb_common_init_commit_hw(dsaf_dev->rcb_common[ppe_common_index]);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index 4894f9a0d39f..0f5cb6962acf 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -25,15 +25,24 @@
 
 #define ETH_PPE_DUMP_NUM 576
 #define ETH_PPE_STATIC_NUM 12
+
+#define HNS_PPEV2_RSS_IND_TBL_SIZE 256
+#define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */
+#define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32))
+
 enum ppe_qid_mode {
-	PPE_QID_MODE0 = 0,	/* fixed queue id mode */
-	PPE_QID_MODE1,		/* switch:128VM non switch:6Port/4VM/4TC */
-	PPE_QID_MODE2,		/* switch:32VM/4TC non switch:6Port/16VM */
-	PPE_QID_MODE3,		/* switch:4TC/8TAG non switch:2Port/64VM */
-	PPE_QID_MODE4,		/* switch:8VM/16TAG non switch:2Port/16VM/4TC */
-	PPE_QID_MODE5,		/* non switch:6Port/16TAG */
-	PPE_QID_MODE6,		/* non switch:6Port/2VM/8TC */
-	PPE_QID_MODE7,		/* non switch:2Port/8VM/8TC */
+	PPE_QID_MODE0 = 0, /* fixed queue id mode */
+	PPE_QID_MODE1,	   /* switch:128VM non switch:6Port/4VM/4TC */
+	PPE_QID_MODE2,	   /* switch:32VM/4TC non switch:6Port/16VM */
+	PPE_QID_MODE3,	   /* switch:4TC/8RSS non switch:2Port/64VM */
+	PPE_QID_MODE4,	   /* switch:8VM/16RSS non switch:2Port/16VM/4TC */
+	PPE_QID_MODE5,	   /* switch:16VM/8TC non switch:6Port/16RSS */
+	PPE_QID_MODE6,	   /* switch:32VM/4RSS non switch:6Port/2VM/8TC */
+	PPE_QID_MODE7,	   /* switch:32RSS non switch:2Port/8VM/8TC */
+	PPE_QID_MODE8,	   /* switch:6VM/4TC/4RSS non switch:2Port/16VM/4RSS */
+	PPE_QID_MODE9,	   /* non switch:2Port/32VM/2RSS */
+	PPE_QID_MODE10,	   /* non switch:2Port/32RSS */
+	PPE_QID_MODE11,	   /* non switch:2Port/4TC/16RSS */
 };
 
 enum ppe_port_mode {
@@ -72,6 +81,8 @@ struct hns_ppe_cb {
 	u8 port;			 /* port id in dsaf  */
 	void __iomem *io_base;
 	int virq;
+	u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */
+	u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */
 };
 
 struct ppe_common_cb {
@@ -102,4 +113,9 @@ void hns_ppe_get_regs(struct hns_ppe_cb *ppe_cb, void *data);
 
 void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data);
 void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data);
+void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value);
+void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb,
+			 const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]);
+void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb,
+			     const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE]);
 #endif /* _HNS_DSAF_PPE_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 4db32c62f062..8c30cec8850a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -136,19 +136,37 @@ void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask)
 
 void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag)
 {
-	u32 clr = 1;
-
 	if (flag & RCB_INT_FLAG_TX) {
-		dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, clr);
-		dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, clr);
+		dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, 1);
+		dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, 1);
 	}
 
 	if (flag & RCB_INT_FLAG_RX) {
-		dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, clr);
-		dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, clr);
+		dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, 1);
+		dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, 1);
 	}
 }
 
+void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask)
+{
+	u32 int_mask_en = !!mask;
+
+	if (flag & RCB_INT_FLAG_TX)
+		dsaf_write_dev(q, RCB_RING_INTMSK_TXWL_REG, int_mask_en);
+
+	if (flag & RCB_INT_FLAG_RX)
+		dsaf_write_dev(q, RCB_RING_INTMSK_RXWL_REG, int_mask_en);
+}
+
+void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag)
+{
+	if (flag & RCB_INT_FLAG_TX)
+		dsaf_write_dev(q, RCBV2_TX_RING_INT_STS_REG, 1);
+
+	if (flag & RCB_INT_FLAG_RX)
+		dsaf_write_dev(q, RCBV2_RX_RING_INT_STS_REG, 1);
+}
+
 /**
  *hns_rcb_ring_enable_hw - enable ring
  *@ring: rcb ring
@@ -193,6 +211,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type)
 			       (u32)dma);
 		dsaf_write_dev(q, RCB_RING_RX_RING_BASEADDR_H_REG,
 			       (u32)((dma >> 31) >> 1));
+
 		dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG,
 			       bd_size_type);
 		dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG,
@@ -204,6 +223,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type)
 			       (u32)dma);
 		dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_H_REG,
 			       (u32)((dma >> 31) >> 1));
+
 		dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG,
 			       bd_size_type);
 		dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG,
@@ -232,9 +252,6 @@ void hns_rcb_init_hw(struct ring_pair_cb *ring)
 static void hns_rcb_set_port_desc_cnt(struct rcb_common_cb *rcb_common,
 				      u32 port_idx, u32 desc_cnt)
 {
-	if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM)
-		port_idx = 0;
-
 	dsaf_write_dev(rcb_common, RCB_CFG_BD_NUM_REG + port_idx * 4,
 		       desc_cnt);
 }
@@ -249,8 +266,6 @@ static int  hns_rcb_set_port_coalesced_frames(struct rcb_common_cb *rcb_common,
 					      u32 port_idx,
 					      u32 coalesced_frames)
 {
-	if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM)
-		port_idx = 0;
 	if (coalesced_frames >= rcb_common->desc_num ||
 	    coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES)
 		return -EINVAL;
@@ -354,6 +369,9 @@ int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common)
 	dsaf_write_dev(rcb_common, RCB_COM_CFG_ENDIAN_REG,
 		       HNS_RCB_COMMON_ENDIAN);
 
+	dsaf_write_dev(rcb_common, RCB_COM_CFG_FNA_REG, 0x0);
+	dsaf_write_dev(rcb_common, RCB_COM_CFG_FA_REG, 0x1);
+
 	return 0;
 }
 
@@ -387,19 +405,23 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
 	struct rcb_common_cb *rcb_common;
 	struct ring_pair_cb *ring_pair_cb;
 	u32 buf_size;
-	u16 desc_num;
-	int irq_idx;
+	u16 desc_num, mdnum_ppkt;
+	bool irq_idx, is_ver1;
 
 	ring_pair_cb = container_of(q, struct ring_pair_cb, q);
+	is_ver1 = AE_IS_VER1(ring_pair_cb->rcb_common->dsaf_dev->dsaf_ver);
 	if (ring_type == RX_RING) {
 		ring = &q->rx_ring;
 		ring->io_base = ring_pair_cb->q.io_base;
 		irq_idx = HNS_RCB_IRQ_IDX_RX;
+		mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT;
 	} else {
 		ring = &q->tx_ring;
 		ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base +
 			HNS_RCB_TX_REG_OFFSET;
 		irq_idx = HNS_RCB_IRQ_IDX_TX;
+		mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT :
+				 HNS_RCBV2_RING_MAX_TXBD_PER_PKT;
 	}
 
 	rcb_common = ring_pair_cb->rcb_common;
@@ -414,7 +436,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
 
 	ring->buf_size = buf_size;
 	ring->desc_num = desc_num;
-	ring->max_desc_num_per_pkt = HNS_RCB_RING_MAX_BD_PER_PKT;
+	ring->max_desc_num_per_pkt = mdnum_ppkt;
 	ring->max_raw_data_sz_per_desc = HNS_RCB_MAX_PKT_SIZE;
 	ring->max_pkt_size = HNS_RCB_MAX_PKT_SIZE;
 	ring->next_to_use = 0;
@@ -445,14 +467,22 @@ static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx)
 	return port;
 }
 
+#define SERVICE_RING_IRQ_IDX(v1) \
+	((v1) ? HNS_SERVICE_RING_IRQ_IDX : HNSV2_SERVICE_RING_IRQ_IDX)
+#define DEBUG_RING_IRQ_IDX(v1) \
+	((v1) ? HNS_DEBUG_RING_IRQ_IDX : HNSV2_DEBUG_RING_IRQ_IDX)
+#define DEBUG_RING_IRQ_OFFSET(v1) \
+	((v1) ? HNS_DEBUG_RING_IRQ_OFFSET : HNSV2_DEBUG_RING_IRQ_OFFSET)
 static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common)
 {
 	int comm_index = rcb_common->comm_index;
+	bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver);
 
 	if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX)
-		return HNS_SERVICE_RING_IRQ_IDX;
+		return SERVICE_RING_IRQ_IDX(is_ver1);
 	else
-		return HNS_DEBUG_RING_IRQ_IDX + (comm_index - 1) * 2;
+		return  DEBUG_RING_IRQ_IDX(is_ver1) +
+			(comm_index - 1) * DEBUG_RING_IRQ_OFFSET(is_ver1);
 }
 
 #define RCB_COMM_BASE_TO_RING_BASE(base, ringid)\
@@ -468,6 +498,10 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common)
 	u32 ring_num = rcb_common->ring_num;
 	int base_irq_idx = hns_rcb_get_base_irq_idx(rcb_common);
 	struct device_node *np = rcb_common->dsaf_dev->dev->of_node;
+	struct platform_device *pdev =
+		container_of(rcb_common->dsaf_dev->dev,
+			     struct platform_device, dev);
+	bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver);
 
 	for (i = 0; i < ring_num; i++) {
 		ring_pair_cb = &rcb_common->ring_pair_cb[i];
@@ -477,10 +511,12 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common)
 		ring_pair_cb->q.io_base =
 			RCB_COMM_BASE_TO_RING_BASE(rcb_common->io_base, i);
 		ring_pair_cb->port_id_in_dsa = hns_rcb_get_port(rcb_common, i);
-		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX]
-			= irq_of_parse_and_map(np, base_irq_idx + i * 2);
-		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX]
-			= irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1);
+		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] =
+		is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) :
+			  platform_get_irq(pdev, base_irq_idx + i * 3 + 1);
+		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] =
+		is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1) :
+			  platform_get_irq(pdev, base_irq_idx + i * 3);
 		ring_pair_cb->q.phy_base =
 			RCB_COMM_BASE_TO_RING_BASE(rcb_common->phy_base, i);
 		hns_rcb_ring_pair_get_cfg(ring_pair_cb);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index 3a2afe2dd8bb..29041b18741a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
@@ -26,6 +26,8 @@ struct rcb_common_cb;
 #define HNS_RCB_SERVICE_NW_ENGINE_NUM		DSAF_COMM_CHN
 #define HNS_RCB_DEBUG_NW_ENGINE_NUM		1
 #define HNS_RCB_RING_MAX_BD_PER_PKT		3
+#define HNS_RCB_RING_MAX_TXBD_PER_PKT		3
+#define HNS_RCBV2_RING_MAX_TXBD_PER_PKT		8
 #define HNS_RCB_MAX_PKT_SIZE MAC_MAX_MTU
 
 #define HNS_RCB_RING_MAX_PENDING_BD		1024
@@ -106,13 +108,17 @@ void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index);
 int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common);
 void hns_rcb_start(struct hnae_queue *q, u32 val);
 void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common);
-void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common);
 void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, int comm_index,
 			    u16 *max_vfn, u16 *max_q_per_vf);
 
+void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common);
+
 void hns_rcb_ring_enable_hw(struct hnae_queue *q, u32 val);
 void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag);
 void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 enable);
+void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask);
+void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag);
+
 void hns_rcb_init_hw(struct ring_pair_cb *ring);
 void hns_rcb_reset_ring_hw(struct hnae_queue *q);
 void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index b475e1bf2e6f..c4d7c26952c4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -10,21 +10,12 @@
 #ifndef _DSAF_REG_H_
 #define _DSAF_REG_H_
 
-#define HNS_GE_FIFO_ERR_INTNUM 8
-#define HNS_XGE_ERR_INTNUM 6
-#define HNS_RCB_COMM_ERR_INTNUM 12
-#define HNS_PPE_TNL_ERR_INTNUM 8
-#define HNS_DSAF_EVENT_INTNUM 21
-#define HNS_DEBUG_RING_INTNUM 4
-#define HNS_SERVICE_RING_INTNUM 256
-
-#define HNS_DEBUG_RING_IRQ_IDX (HNS_GE_FIFO_ERR_INTNUM + HNS_XGE_ERR_INTNUM +\
-		HNS_RCB_COMM_ERR_INTNUM + HNS_PPE_TNL_ERR_INTNUM +\
-		HNS_DSAF_EVENT_INTNUM)
-#define HNS_SERVICE_RING_IRQ_IDX (HNS_DEBUG_RING_IRQ_IDX +\
-		HNS_DEBUG_RING_INTNUM)
-
-#define DSAF_IRQ_NUM 18
+#define HNS_DEBUG_RING_IRQ_IDX 55
+#define HNS_SERVICE_RING_IRQ_IDX 59
+#define HNS_DEBUG_RING_IRQ_OFFSET 2
+#define HNSV2_DEBUG_RING_IRQ_IDX 409
+#define HNSV2_SERVICE_RING_IRQ_IDX 25
+#define HNSV2_DEBUG_RING_IRQ_OFFSET 9
 
 #define DSAF_MAX_PORT_NUM_PER_CHIP 8
 #define DSAF_SERVICE_PORT_NUM_PER_DSAF 6
@@ -39,9 +30,15 @@
 #define DSAF_GE_NUM		((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM))
 #define DSAF_PORT_NUM		((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM))
 #define DSAF_XGE_NUM		DSAF_SERVICE_NW_NUM
+#define DSAF_PORT_TYPE_NUM 3
 #define DSAF_NODE_NUM		18
 #define DSAF_XOD_BIG_NUM	DSAF_NODE_NUM
 #define DSAF_SBM_NUM		DSAF_NODE_NUM
+#define DSAFV2_SBM_NUM		8
+#define DSAFV2_SBM_XGE_CHN    6
+#define DSAFV2_SBM_PPE_CHN    1
+#define DASFV2_ROCEE_CRD_NUM  8
+
 #define DSAF_VOQ_NUM		DSAF_NODE_NUM
 #define DSAF_INODE_NUM		DSAF_NODE_NUM
 #define DSAF_XOD_NUM		8
@@ -52,56 +49,56 @@
 #define DSAF_TCAM_SUM		512
 #define DSAF_LINE_SUM		(2048 * 14)
 
-#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG                0x100
-#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG              0x180
-#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG              0x184
-#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG              0x188
-#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG              0x18C
-#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG              0x190
-#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG              0x194
-#define DSAF_SUB_SC_DSAF_CLK_EN_REG                    0x300
-#define DSAF_SUB_SC_DSAF_CLK_DIS_REG                   0x304
-#define DSAF_SUB_SC_NT_CLK_EN_REG                      0x308
-#define DSAF_SUB_SC_NT_CLK_DIS_REG                     0x30C
-#define DSAF_SUB_SC_XGE_CLK_EN_REG                     0x310
-#define DSAF_SUB_SC_XGE_CLK_DIS_REG                    0x314
-#define DSAF_SUB_SC_GE_CLK_EN_REG                      0x318
-#define DSAF_SUB_SC_GE_CLK_DIS_REG                     0x31C
-#define DSAF_SUB_SC_PPE_CLK_EN_REG                     0x320
-#define DSAF_SUB_SC_PPE_CLK_DIS_REG                    0x324
-#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG             0x350
-#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG            0x354
-#define DSAF_SUB_SC_XBAR_RESET_REQ_REG                 0xA00
-#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG                0xA04
-#define DSAF_SUB_SC_NT_RESET_REQ_REG                   0xA08
-#define DSAF_SUB_SC_NT_RESET_DREQ_REG                  0xA0C
-#define DSAF_SUB_SC_XGE_RESET_REQ_REG                  0xA10
-#define DSAF_SUB_SC_XGE_RESET_DREQ_REG                 0xA14
-#define DSAF_SUB_SC_GE_RESET_REQ0_REG                  0xA18
-#define DSAF_SUB_SC_GE_RESET_DREQ0_REG                 0xA1C
-#define DSAF_SUB_SC_GE_RESET_REQ1_REG                  0xA20
-#define DSAF_SUB_SC_GE_RESET_DREQ1_REG                 0xA24
-#define DSAF_SUB_SC_PPE_RESET_REQ_REG                  0xA48
-#define DSAF_SUB_SC_PPE_RESET_DREQ_REG                 0xA4C
-#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG          0xA88
-#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG         0xA8C
-#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG         0x2060
-#define DSAF_SUB_SC_TCAM_MBIST_EN_REG                  0x2300
-#define DSAF_SUB_SC_DSAF_CLK_ST_REG                    0x5300
-#define DSAF_SUB_SC_NT_CLK_ST_REG                      0x5304
-#define DSAF_SUB_SC_XGE_CLK_ST_REG                     0x5308
-#define DSAF_SUB_SC_GE_CLK_ST_REG                      0x530C
-#define DSAF_SUB_SC_PPE_CLK_ST_REG                     0x5310
-#define DSAF_SUB_SC_ROCEE_CLK_ST_REG                   0x5314
-#define DSAF_SUB_SC_CPU_CLK_ST_REG                     0x5318
-#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG             0x5328
-#define DSAF_SUB_SC_XBAR_RESET_ST_REG                  0x5A00
-#define DSAF_SUB_SC_NT_RESET_ST_REG                    0x5A04
-#define DSAF_SUB_SC_XGE_RESET_ST_REG                   0x5A08
-#define DSAF_SUB_SC_GE_RESET_ST0_REG                   0x5A0C
-#define DSAF_SUB_SC_GE_RESET_ST1_REG                   0x5A10
-#define DSAF_SUB_SC_PPE_RESET_ST_REG                   0x5A24
-#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG           0x5A44
+#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG			0x100
+#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG		0x180
+#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG		0x184
+#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG		0x188
+#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG		0x18C
+#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG		0x190
+#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG		0x194
+#define DSAF_SUB_SC_DSAF_CLK_EN_REG			0x300
+#define DSAF_SUB_SC_DSAF_CLK_DIS_REG			0x304
+#define DSAF_SUB_SC_NT_CLK_EN_REG			0x308
+#define DSAF_SUB_SC_NT_CLK_DIS_REG			0x30C
+#define DSAF_SUB_SC_XGE_CLK_EN_REG			0x310
+#define DSAF_SUB_SC_XGE_CLK_DIS_REG			0x314
+#define DSAF_SUB_SC_GE_CLK_EN_REG			0x318
+#define DSAF_SUB_SC_GE_CLK_DIS_REG			0x31C
+#define DSAF_SUB_SC_PPE_CLK_EN_REG			0x320
+#define DSAF_SUB_SC_PPE_CLK_DIS_REG			0x324
+#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG		0x350
+#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG		0x354
+#define DSAF_SUB_SC_XBAR_RESET_REQ_REG			0xA00
+#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG			0xA04
+#define DSAF_SUB_SC_NT_RESET_REQ_REG			0xA08
+#define DSAF_SUB_SC_NT_RESET_DREQ_REG			0xA0C
+#define DSAF_SUB_SC_XGE_RESET_REQ_REG			0xA10
+#define DSAF_SUB_SC_XGE_RESET_DREQ_REG			0xA14
+#define DSAF_SUB_SC_GE_RESET_REQ0_REG			0xA18
+#define DSAF_SUB_SC_GE_RESET_DREQ0_REG			0xA1C
+#define DSAF_SUB_SC_GE_RESET_REQ1_REG			0xA20
+#define DSAF_SUB_SC_GE_RESET_DREQ1_REG			0xA24
+#define DSAF_SUB_SC_PPE_RESET_REQ_REG			0xA48
+#define DSAF_SUB_SC_PPE_RESET_DREQ_REG			0xA4C
+#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG		0xA88
+#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG		0xA8C
+#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG		0x2060
+#define DSAF_SUB_SC_TCAM_MBIST_EN_REG			0x2300
+#define DSAF_SUB_SC_DSAF_CLK_ST_REG			0x5300
+#define DSAF_SUB_SC_NT_CLK_ST_REG			0x5304
+#define DSAF_SUB_SC_XGE_CLK_ST_REG			0x5308
+#define DSAF_SUB_SC_GE_CLK_ST_REG			0x530C
+#define DSAF_SUB_SC_PPE_CLK_ST_REG			0x5310
+#define DSAF_SUB_SC_ROCEE_CLK_ST_REG			0x5314
+#define DSAF_SUB_SC_CPU_CLK_ST_REG			0x5318
+#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG		0x5328
+#define DSAF_SUB_SC_XBAR_RESET_ST_REG			0x5A00
+#define DSAF_SUB_SC_NT_RESET_ST_REG			0x5A04
+#define DSAF_SUB_SC_XGE_RESET_ST_REG			0x5A08
+#define DSAF_SUB_SC_GE_RESET_ST0_REG			0x5A0C
+#define DSAF_SUB_SC_GE_RESET_ST1_REG			0x5A10
+#define DSAF_SUB_SC_PPE_RESET_ST_REG			0x5A24
+#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG		0x5A44
 
 /*serdes offset**/
 #define HNS_MAC_HILINK3_REG DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG
@@ -178,6 +175,7 @@
 #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG		0x200C
 #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG		0x230C
 #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG	0x260C
+#define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG		 0x238C
 #define DSAF_SBM_FREE_CNT_0_0_REG		0x2010
 #define DSAF_SBM_FREE_CNT_1_0_REG		0x2014
 #define DSAF_SBM_BP_CNT_0_0_REG			0x2018
@@ -319,6 +317,8 @@
 #define PPE_CFG_TAG_GEN_REG			0x90
 #define PPE_CFG_PARSE_TAG_REG			0x94
 #define PPE_CFG_PRO_CHECK_EN_REG		0x98
+#define PPEV2_CFG_TSO_EN_REG			0xA0
+#define PPEV2_VLAN_STRIP_EN_REG			0xAC
 #define PPE_INTEN_REG				0x100
 #define PPE_RINT_REG				0x104
 #define PPE_INTSTS_REG				0x108
@@ -351,6 +351,8 @@
 #define PPE_ECO0_REG				0x32C
 #define PPE_ECO1_REG				0x330
 #define PPE_ECO2_REG				0x334
+#define PPEV2_INDRECTION_TBL_REG		0x800
+#define PPEV2_RSS_KEY_REG			0x900
 
 #define RCB_COM_CFG_ENDIAN_REG			0x0
 #define RCB_COM_CFG_SYS_FSH_REG			0xC
@@ -431,8 +433,10 @@
 
 #define RCB_RING_INTMSK_RXWL_REG		0x000A0
 #define RCB_RING_INTSTS_RX_RING_REG		0x000A4
+#define RCBV2_RX_RING_INT_STS_REG		0x000A8
 #define RCB_RING_INTMSK_TXWL_REG		0x000AC
 #define RCB_RING_INTSTS_TX_RING_REG		0x000B0
+#define RCBV2_TX_RING_INT_STS_REG		0x000B4
 #define RCB_RING_INTMSK_RX_OVERTIME_REG		0x000B8
 #define RCB_RING_INTSTS_RX_OVERTIME_REG		0x000BC
 #define RCB_RING_INTMSK_TX_OVERTIME_REG		0x000C4
@@ -678,6 +682,10 @@
 
 #define XGMAC_TRX_CORE_SRST_M			0x2080
 
+#define DSAF_SRAM_INIT_OVER_M 0xff
+#define DSAFV2_SRAM_INIT_OVER_M 0x3ff
+#define DSAF_SRAM_INIT_OVER_S 0
+
 #define DSAF_CFG_EN_S 0
 #define DSAF_CFG_TC_MODE_S 1
 #define DSAF_CFG_CRC_EN_S 2
@@ -685,6 +693,7 @@
 #define DSAF_CFG_MIX_MODE_S 4
 #define DSAF_CFG_STP_MODE_S 5
 #define DSAF_CFG_LOCA_ADDR_EN_S 6
+#define DSAFV2_CFG_VLAN_TAG_MODE_S 17
 
 #define DSAF_CNT_CLR_CE_S 0
 #define DSAF_SNAP_EN_S 1
@@ -707,6 +716,16 @@
 
 #define DSAF_INODE_IN_PORT_NUM_M 7
 #define DSAF_INODE_IN_PORT_NUM_S 0
+#define DSAFV2_INODE_IN_PORT1_NUM_M (7ULL << 3)
+#define DSAFV2_INODE_IN_PORT1_NUM_S 3
+#define DSAFV2_INODE_IN_PORT2_NUM_M (7ULL << 6)
+#define DSAFV2_INODE_IN_PORT2_NUM_S 6
+#define DSAFV2_INODE_IN_PORT3_NUM_M (7ULL << 9)
+#define DSAFV2_INODE_IN_PORT3_NUM_S 9
+#define DSAFV2_INODE_IN_PORT4_NUM_M (7ULL << 12)
+#define DSAFV2_INODE_IN_PORT4_NUM_S 12
+#define DSAFV2_INODE_IN_PORT5_NUM_M (7ULL << 15)
+#define DSAFV2_INODE_IN_PORT5_NUM_S 15
 
 #define HNS_DSAF_I4TC_CFG 0x18688688
 #define HNS_DSAF_I8TC_CFG 0x18FAC688
@@ -738,6 +757,33 @@
 #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 10
 #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 10) - 1) << 10)
 
+#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S 0
+#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0)
+#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S 9
+#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9)
+#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S 18
+#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M (((1ULL << 10) - 1) << 18)
+
+#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S 0
+#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0)
+#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S 9
+#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9)
+
+#define DSAFV2_SBM_CFG2_SET_BUF_NUM_S 0
+#define DSAFV2_SBM_CFG2_SET_BUF_NUM_M (((1ULL << 9) - 1) << 0)
+#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_S 9
+#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_M (((1ULL << 9) - 1) << 9)
+
+#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S 0
+#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0)
+#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 9
+#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9)
+
+#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S 0
+#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0)
+#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9
+#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9)
+
 #define DSAF_TBL_TCAM_ADDR_S 0
 #define DSAF_TBL_TCAM_ADDR_M ((1ULL << 9) - 1)
 
@@ -797,6 +843,18 @@
 #define PPE_CFG_QID_MODE_CF_QID_MODE_S	8
 #define PPE_CFG_QID_MODE_CF_QID_MODE_M	(0x7 << PPE_CFG_QID_MODE_CF_QID_MODE_S)
 
+#define PPEV2_CFG_RSS_TBL_4N0_S	0
+#define PPEV2_CFG_RSS_TBL_4N0_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N0_S)
+
+#define PPEV2_CFG_RSS_TBL_4N1_S	8
+#define PPEV2_CFG_RSS_TBL_4N1_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N1_S)
+
+#define PPEV2_CFG_RSS_TBL_4N2_S	16
+#define PPEV2_CFG_RSS_TBL_4N2_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N2_S)
+
+#define PPEV2_CFG_RSS_TBL_4N3_S	24
+#define PPEV2_CFG_RSS_TBL_4N3_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S)
+
 #define PPE_CNT_CLR_CE_B	0
 #define PPE_CNT_CLR_SNAP_EN_B	1
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 08cef0dfb5db..5a81dafd725e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -33,10 +33,105 @@
 
 #define RCB_IRQ_NOT_INITED 0
 #define RCB_IRQ_INITED 1
+#define HNS_BUFFER_SIZE_2048 2048
+
+#define BD_MAX_SEND_SIZE 8191
+#define SKB_TMP_LEN(SKB) \
+	(((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB))
+
+static void fill_v2_desc(struct hnae_ring *ring, void *priv,
+			 int size, dma_addr_t dma, int frag_end,
+			 int buf_num, enum hns_desc_type type, int mtu)
+{
+	struct hnae_desc *desc = &ring->desc[ring->next_to_use];
+	struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	struct iphdr *iphdr;
+	struct ipv6hdr *ipv6hdr;
+	struct sk_buff *skb;
+	int skb_tmp_len;
+	__be16 protocol;
+	u8 bn_pid = 0;
+	u8 rrcfv = 0;
+	u8 ip_offset = 0;
+	u8 tvsvsn = 0;
+	u16 mss = 0;
+	u8 l4_len = 0;
+	u16 paylen = 0;
+
+	desc_cb->priv = priv;
+	desc_cb->length = size;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	desc->addr = cpu_to_le64(dma);
+	desc->tx.send_size = cpu_to_le16((u16)size);
+
+	/*config bd buffer end */
+	hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
+	hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1);
+
+	if (type == DESC_TYPE_SKB) {
+		skb = (struct sk_buff *)priv;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			skb_reset_mac_len(skb);
+			protocol = skb->protocol;
+			ip_offset = ETH_HLEN;
+
+			if (protocol == htons(ETH_P_8021Q)) {
+				ip_offset += VLAN_HLEN;
+				protocol = vlan_get_protocol(skb);
+				skb->protocol = protocol;
+			}
+
+			if (skb->protocol == htons(ETH_P_IP)) {
+				iphdr = ip_hdr(skb);
+				hnae_set_bit(rrcfv, HNSV2_TXD_L3CS_B, 1);
+				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
+
+				/* check for tcp/udp header */
+				if (iphdr->protocol == IPPROTO_TCP) {
+					hnae_set_bit(tvsvsn,
+						     HNSV2_TXD_TSE_B, 1);
+					skb_tmp_len = SKB_TMP_LEN(skb);
+					l4_len = tcp_hdrlen(skb);
+					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
+					paylen = skb->len - skb_tmp_len;
+				}
+			} else if (skb->protocol == htons(ETH_P_IPV6)) {
+				hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1);
+				ipv6hdr = ipv6_hdr(skb);
+				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
+
+				/* check for tcp/udp header */
+				if (ipv6hdr->nexthdr == IPPROTO_TCP) {
+					hnae_set_bit(tvsvsn,
+						     HNSV2_TXD_TSE_B, 1);
+					skb_tmp_len = SKB_TMP_LEN(skb);
+					l4_len = tcp_hdrlen(skb);
+					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
+					paylen = skb->len - skb_tmp_len;
+				}
+			}
+			desc->tx.ip_offset = ip_offset;
+			desc->tx.tse_vlan_snap_v6_sctp_nth = tvsvsn;
+			desc->tx.mss = cpu_to_le16(mss);
+			desc->tx.l4_len = l4_len;
+			desc->tx.paylen = cpu_to_le16(paylen);
+		}
+	}
+
+	hnae_set_bit(rrcfv, HNSV2_TXD_FE_B, frag_end);
+
+	desc->tx.bn_pid = bn_pid;
+	desc->tx.ra_ri_cs_fe_vld = rrcfv;
+
+	ring_ptr_move_fw(ring, next_to_use);
+}
 
 static void fill_desc(struct hnae_ring *ring, void *priv,
 		      int size, dma_addr_t dma, int frag_end,
-		      int buf_num, enum hns_desc_type type)
+		      int buf_num, enum hns_desc_type type, int mtu)
 {
 	struct hnae_desc *desc = &ring->desc[ring->next_to_use];
 	struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
@@ -100,47 +195,129 @@ static void unfill_desc(struct hnae_ring *ring)
 	ring_ptr_move_bw(ring, next_to_use);
 }
 
-int hns_nic_net_xmit_hw(struct net_device *ndev,
-			struct sk_buff *skb,
-			struct hns_nic_ring_data *ring_data)
+static int hns_nic_maybe_stop_tx(
+	struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring)
 {
-	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct device *dev = priv->dev;
-	struct hnae_ring *ring = ring_data->ring;
-	struct netdev_queue *dev_queue;
-	struct skb_frag_struct *frag;
+	struct sk_buff *skb = *out_skb;
+	struct sk_buff *new_skb = NULL;
 	int buf_num;
-	dma_addr_t dma;
-	int size, next_to_use;
-	int i, j;
-	struct sk_buff *new_skb;
-
-	assert(ring->max_desc_num_per_pkt <= ring->desc_num);
 
 	/* no. of segments (plus a header) */
 	buf_num = skb_shinfo(skb)->nr_frags + 1;
 
 	if (unlikely(buf_num > ring->max_desc_num_per_pkt)) {
-		if (ring_space(ring) < 1) {
-			ring->stats.tx_busy++;
-			goto out_net_tx_busy;
-		}
+		if (ring_space(ring) < 1)
+			return -EBUSY;
 
 		new_skb = skb_copy(skb, GFP_ATOMIC);
-		if (!new_skb) {
-			ring->stats.sw_err_cnt++;
-			netdev_err(ndev, "no memory to xmit!\n");
-			goto out_err_tx_ok;
-		}
+		if (!new_skb)
+			return -ENOMEM;
 
 		dev_kfree_skb_any(skb);
-		skb = new_skb;
+		*out_skb = new_skb;
 		buf_num = 1;
-		assert(skb_shinfo(skb)->nr_frags == 1);
 	} else if (buf_num > ring_space(ring)) {
+		return -EBUSY;
+	}
+
+	*bnum = buf_num;
+	return 0;
+}
+
+static int hns_nic_maybe_stop_tso(
+	struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring)
+{
+	int i;
+	int size;
+	int buf_num;
+	int frag_num;
+	struct sk_buff *skb = *out_skb;
+	struct sk_buff *new_skb = NULL;
+	struct skb_frag_struct *frag;
+
+	size = skb_headlen(skb);
+	buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE;
+
+	frag_num = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < frag_num; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		size = skb_frag_size(frag);
+		buf_num += (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE;
+	}
+
+	if (unlikely(buf_num > ring->max_desc_num_per_pkt)) {
+		buf_num = (skb->len + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE;
+		if (ring_space(ring) < buf_num)
+			return -EBUSY;
+		/* manual split the send packet */
+		new_skb = skb_copy(skb, GFP_ATOMIC);
+		if (!new_skb)
+			return -ENOMEM;
+		dev_kfree_skb_any(skb);
+		*out_skb = new_skb;
+
+	} else if (ring_space(ring) < buf_num) {
+		return -EBUSY;
+	}
+
+	*bnum = buf_num;
+	return 0;
+}
+
+static void fill_tso_desc(struct hnae_ring *ring, void *priv,
+			  int size, dma_addr_t dma, int frag_end,
+			  int buf_num, enum hns_desc_type type, int mtu)
+{
+	int frag_buf_num;
+	int sizeoflast;
+	int k;
+
+	frag_buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE;
+	sizeoflast = size % BD_MAX_SEND_SIZE;
+	sizeoflast = sizeoflast ? sizeoflast : BD_MAX_SEND_SIZE;
+
+	/* when the frag size is bigger than hardware, split this frag */
+	for (k = 0; k < frag_buf_num; k++)
+		fill_v2_desc(ring, priv,
+			     (k == frag_buf_num - 1) ?
+					sizeoflast : BD_MAX_SEND_SIZE,
+			     dma + BD_MAX_SEND_SIZE * k,
+			     frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+			     buf_num,
+			     (type == DESC_TYPE_SKB && !k) ?
+					DESC_TYPE_SKB : DESC_TYPE_PAGE,
+			     mtu);
+}
+
+int hns_nic_net_xmit_hw(struct net_device *ndev,
+			struct sk_buff *skb,
+			struct hns_nic_ring_data *ring_data)
+{
+	struct hns_nic_priv *priv = netdev_priv(ndev);
+	struct device *dev = priv->dev;
+	struct hnae_ring *ring = ring_data->ring;
+	struct netdev_queue *dev_queue;
+	struct skb_frag_struct *frag;
+	int buf_num;
+	int seg_num;
+	dma_addr_t dma;
+	int size, next_to_use;
+	int i;
+
+	switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) {
+	case -EBUSY:
 		ring->stats.tx_busy++;
 		goto out_net_tx_busy;
+	case -ENOMEM:
+		ring->stats.sw_err_cnt++;
+		netdev_err(ndev, "no memory to xmit!\n");
+		goto out_err_tx_ok;
+	default:
+		break;
 	}
+
+	/* no. of segments (plus a header) */
+	seg_num = skb_shinfo(skb)->nr_frags + 1;
 	next_to_use = ring->next_to_use;
 
 	/* fill the first part */
@@ -151,11 +328,11 @@ int hns_nic_net_xmit_hw(struct net_device *ndev,
 		ring->stats.sw_err_cnt++;
 		goto out_err_tx_ok;
 	}
-	fill_desc(ring, skb, size, dma, buf_num == 1 ? 1 : 0, buf_num,
-		  DESC_TYPE_SKB);
+	priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
+			    buf_num, DESC_TYPE_SKB, ndev->mtu);
 
 	/* fill the fragments */
-	for (i = 1; i < buf_num; i++) {
+	for (i = 1; i < seg_num; i++) {
 		frag = &skb_shinfo(skb)->frags[i - 1];
 		size = skb_frag_size(frag);
 		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
@@ -164,8 +341,9 @@ int hns_nic_net_xmit_hw(struct net_device *ndev,
 			ring->stats.sw_err_cnt++;
 			goto out_map_frag_fail;
 		}
-		fill_desc(ring, skb_frag_page(frag), size, dma,
-			  buf_num - 1 == i ? 1 : 0, buf_num, DESC_TYPE_PAGE);
+		priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
+				    seg_num - 1 == i ? 1 : 0, buf_num,
+				    DESC_TYPE_PAGE, ndev->mtu);
 	}
 
 	/*complete translate all packets*/
@@ -182,19 +360,20 @@ int hns_nic_net_xmit_hw(struct net_device *ndev,
 
 out_map_frag_fail:
 
-	for (j = i - 1; j > 0; j--) {
+	while (ring->next_to_use != next_to_use) {
 		unfill_desc(ring);
-		next_to_use = ring->next_to_use;
-		dma_unmap_page(dev, ring->desc_cb[next_to_use].dma,
-			       ring->desc_cb[next_to_use].length,
-			       DMA_TO_DEVICE);
+		if (ring->next_to_use != next_to_use)
+			dma_unmap_page(dev,
+				       ring->desc_cb[ring->next_to_use].dma,
+				       ring->desc_cb[ring->next_to_use].length,
+				       DMA_TO_DEVICE);
+		else
+			dma_unmap_single(dev,
+					 ring->desc_cb[next_to_use].dma,
+					 ring->desc_cb[next_to_use].length,
+					 DMA_TO_DEVICE);
 	}
 
-	unfill_desc(ring);
-	next_to_use = ring->next_to_use;
-	dma_unmap_single(dev, ring->desc_cb[next_to_use].dma,
-			 ring->desc_cb[next_to_use].length, DMA_TO_DEVICE);
-
 out_err_tx_ok:
 
 	dev_kfree_skb_any(skb);
@@ -313,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
 		return max_size;
 }
 
-static void
-hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
+static void hns_nic_reuse_page(struct sk_buff *skb, int i,
+			       struct hnae_ring *ring, int pull_len,
+			       struct hnae_desc_cb *desc_cb)
 {
+	struct hnae_desc *desc;
+	int truesize, size;
+	int last_offset;
+
+	desc = &ring->desc[ring->next_to_clean];
+	size = le16_to_cpu(desc->rx.size);
+
+#if (PAGE_SIZE < 8192)
+	if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+		truesize = hnae_buf_size(ring);
+	} else {
+		truesize = ALIGN(size, L1_CACHE_BYTES);
+		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+	}
+
+#else
+	truesize = ALIGN(size, L1_CACHE_BYTES);
+	last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+#endif
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
+			size - pull_len, truesize - pull_len);
+
 	 /* avoid re-using remote pages,flag default unreuse */
 	if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
+#if (PAGE_SIZE < 8192)
+		if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+			/* if we are only owner of page we can reuse it */
+			if (likely(page_count(desc_cb->priv) == 1)) {
+				/* flip page offset to other buffer */
+				desc_cb->page_offset ^= truesize;
+
+				desc_cb->reuse_flag = 1;
+				/* bump ref count on page before it is given*/
+				get_page(desc_cb->priv);
+			}
+			return;
+		}
+#endif
 		/* move offset up to the next cache line */
-		desc_cb->page_offset += tsize;
+		desc_cb->page_offset += truesize;
 
 		if (desc_cb->page_offset <= last_offset) {
 			desc_cb->reuse_flag = 1;
@@ -329,35 +546,59 @@ hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
 	}
 }
 
+static void get_v2rx_desc_bnum(u32 bnum_flag, int *out_bnum)
+{
+	*out_bnum = hnae_get_field(bnum_flag,
+				   HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S) + 1;
+}
+
+static void get_rx_desc_bnum(u32 bnum_flag, int *out_bnum)
+{
+	*out_bnum = hnae_get_field(bnum_flag,
+				   HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S);
+}
+
 static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 			       struct sk_buff **out_skb, int *out_bnum)
 {
 	struct hnae_ring *ring = ring_data->ring;
 	struct net_device *ndev = ring_data->napi.dev;
+	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct sk_buff *skb;
 	struct hnae_desc *desc;
 	struct hnae_desc_cb *desc_cb;
 	unsigned char *va;
-	int bnum, length, size, i, truesize, last_offset;
+	int bnum, length, i;
 	int pull_len;
 	u32 bnum_flag;
 
-	last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
 	desc = &ring->desc[ring->next_to_clean];
 	desc_cb = &ring->desc_cb[ring->next_to_clean];
-	length = le16_to_cpu(desc->rx.pkt_len);
-	bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
-	bnum = hnae_get_field(bnum_flag, HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S);
-	*out_bnum = bnum;
+
+	prefetch(desc);
+
 	va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
 
-	skb = *out_skb = napi_alloc_skb(&ring_data->napi, HNS_RX_HEAD_SIZE);
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	skb = *out_skb = napi_alloc_skb(&ring_data->napi,
+					HNS_RX_HEAD_SIZE);
 	if (unlikely(!skb)) {
 		netdev_err(ndev, "alloc rx skb fail\n");
 		ring->stats.sw_err_cnt++;
 		return -ENOMEM;
 	}
 
+	prefetchw(skb->data);
+	length = le16_to_cpu(desc->rx.pkt_len);
+	bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
+	priv->ops.get_rxd_bnum(bnum_flag, &bnum);
+	*out_bnum = bnum;
+
 	if (length <= HNS_RX_HEAD_SIZE) {
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
@@ -380,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		memcpy(__skb_put(skb, pull_len), va,
 		       ALIGN(pull_len, sizeof(long)));
 
-		size = le16_to_cpu(desc->rx.size);
-		truesize = ALIGN(size, L1_CACHE_BYTES);
-		skb_add_rx_frag(skb, 0, desc_cb->priv,
-				desc_cb->page_offset + pull_len,
-				size - pull_len, truesize - pull_len);
-
-		hns_nic_reuse_page(desc_cb, truesize, last_offset);
+		hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
 		ring_ptr_move_fw(ring, next_to_clean);
 
 		if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
@@ -396,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 		for (i = 1; i < bnum; i++) {
 			desc = &ring->desc[ring->next_to_clean];
 			desc_cb = &ring->desc_cb[ring->next_to_clean];
-			size = le16_to_cpu(desc->rx.size);
-			truesize = ALIGN(size, L1_CACHE_BYTES);
-			skb_add_rx_frag(skb, i, desc_cb->priv,
-					desc_cb->page_offset,
-					size, truesize);
 
-			hns_nic_reuse_page(desc_cb, truesize, last_offset);
+			hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
 			ring_ptr_move_fw(ring, next_to_clean);
 		}
 	}
@@ -540,20 +770,20 @@ recv:
 	}
 
 	/* make all data has been write before submit */
-	if (clean_count > 0) {
-		hns_nic_alloc_rx_buffers(ring_data, clean_count);
-		clean_count = 0;
-	}
-
 	if (recv_pkts < budget) {
 		ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
-		rmb(); /*complete read rx ring bd number*/
-		if (ex_num > 0) {
-			num += ex_num;
+
+		if (ex_num > clean_count) {
+			num += ex_num - clean_count;
+			rmb(); /*complete read rx ring bd number*/
 			goto recv;
 		}
 	}
 
+	/* make all data has been write before submit */
+	if (clean_count > 0)
+		hns_nic_alloc_rx_buffers(ring_data, clean_count);
+
 	return recv_pkts;
 }
 
@@ -642,14 +872,20 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 
 	bytes = 0;
 	pkts = 0;
-	while (head != ring->next_to_clean)
+	while (head != ring->next_to_clean) {
 		hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
+		/* issue prefetch for next Tx descriptor */
+		prefetch(&ring->desc_cb[ring->next_to_clean]);
+	}
 
 	NETIF_TX_UNLOCK(ndev);
 
 	dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
 	netdev_tx_completed_queue(dev_queue, pkts, bytes);
 
+	if (unlikely(priv->link && !netif_carrier_ok(ndev)))
+		netif_carrier_on(ndev);
+
 	if (unlikely(pkts && netif_carrier_ok(ndev) &&
 		     (ring_space(ring) >= ring->max_desc_num_per_pkt * 2))) {
 		/* Make sure that anybody stopping the queue after this
@@ -716,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
 			ring_data->ring, 0);
 
 		ring_data->fini_process(ring_data);
+		return 0;
 	}
 
 	return clean_complete;
@@ -848,15 +1085,58 @@ static void hns_nic_ring_close(struct net_device *netdev, int idx)
 	napi_disable(&priv->ring_data[idx].napi);
 }
 
-static int hns_nic_init_irq(struct hns_nic_priv *priv)
+static void hns_set_irq_affinity(struct hns_nic_priv *priv)
 {
 	struct hnae_handle *h = priv->ae_handle;
 	struct hns_nic_ring_data *rd;
 	int i;
-	int ret;
 	int cpu;
 	cpumask_t mask;
 
+	/*diffrent irq banlance for 16core and 32core*/
+	if (h->q_num == num_possible_cpus()) {
+		for (i = 0; i < h->q_num * 2; i++) {
+			rd = &priv->ring_data[i];
+			if (cpu_online(rd->queue_index)) {
+				cpumask_clear(&mask);
+				cpu = rd->queue_index;
+				cpumask_set_cpu(cpu, &mask);
+				(void)irq_set_affinity_hint(rd->ring->irq,
+							    &mask);
+			}
+		}
+	} else {
+		for (i = 0; i < h->q_num; i++) {
+			rd = &priv->ring_data[i];
+			if (cpu_online(rd->queue_index * 2)) {
+				cpumask_clear(&mask);
+				cpu = rd->queue_index * 2;
+				cpumask_set_cpu(cpu, &mask);
+				(void)irq_set_affinity_hint(rd->ring->irq,
+							    &mask);
+			}
+		}
+
+		for (i = h->q_num; i < h->q_num * 2; i++) {
+			rd = &priv->ring_data[i];
+			if (cpu_online(rd->queue_index * 2 + 1)) {
+				cpumask_clear(&mask);
+				cpu = rd->queue_index * 2 + 1;
+				cpumask_set_cpu(cpu, &mask);
+				(void)irq_set_affinity_hint(rd->ring->irq,
+							    &mask);
+			}
+		}
+	}
+}
+
+static int hns_nic_init_irq(struct hns_nic_priv *priv)
+{
+	struct hnae_handle *h = priv->ae_handle;
+	struct hns_nic_ring_data *rd;
+	int i;
+	int ret;
+
 	for (i = 0; i < h->q_num * 2; i++) {
 		rd = &priv->ring_data[i];
 
@@ -878,16 +1158,11 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
 		}
 		disable_irq(rd->ring->irq);
 		rd->ring->irq_init_flag = RCB_IRQ_INITED;
-
-		/*set cpu affinity*/
-		if (cpu_online(rd->queue_index)) {
-			cpumask_clear(&mask);
-			cpu = rd->queue_index;
-			cpumask_set_cpu(cpu, &mask);
-			irq_set_affinity_hint(rd->ring->irq, &mask);
-		}
 	}
 
+	/*set cpu affinity*/
+	hns_set_irq_affinity(priv);
+
 	return 0;
 }
 
@@ -1136,6 +1411,51 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu)
 	return ret;
 }
 
+static int hns_nic_set_features(struct net_device *netdev,
+				netdev_features_t features)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_handle *h = priv->ae_handle;
+
+	switch (priv->enet_ver) {
+	case AE_VERSION_1:
+		if (features & (NETIF_F_TSO | NETIF_F_TSO6))
+			netdev_info(netdev, "enet v1 do not support tso!\n");
+		break;
+	default:
+		if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
+			priv->ops.fill_desc = fill_tso_desc;
+			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
+			/* The chip only support 7*4096 */
+			netif_set_gso_max_size(netdev, 7 * 4096);
+			h->dev->ops->set_tso_stats(h, 1);
+		} else {
+			priv->ops.fill_desc = fill_v2_desc;
+			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
+			h->dev->ops->set_tso_stats(h, 0);
+		}
+		break;
+	}
+	netdev->features = features;
+	return 0;
+}
+
+static netdev_features_t hns_nic_fix_features(
+		struct net_device *netdev, netdev_features_t features)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+
+	switch (priv->enet_ver) {
+	case AE_VERSION_1:
+		features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
+				NETIF_F_HW_VLAN_CTAG_FILTER);
+		break;
+	default:
+		break;
+	}
+	return features;
+}
+
 /**
  * nic_set_multicast_list - set mutl mac address
  * @netdev: net device
@@ -1231,6 +1551,8 @@ static const struct net_device_ops hns_nic_netdev_ops = {
 	.ndo_set_mac_address = hns_nic_net_set_mac_address,
 	.ndo_change_mtu = hns_nic_change_mtu,
 	.ndo_do_ioctl = hns_nic_do_ioctl,
+	.ndo_set_features = hns_nic_set_features,
+	.ndo_fix_features = hns_nic_fix_features,
 	.ndo_get_stats64 = hns_nic_get_stats64,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = hns_nic_poll_controller,
@@ -1315,22 +1637,26 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv)
 		return;
 
 	hns_nic_dump(priv);
-	netdev_info(priv->netdev, "Reset %s port\n",
-		    (type == HNAE_PORT_DEBUG ? "debug" : "business"));
+	netdev_info(priv->netdev, "try to reset %s port!\n",
+		    (type == HNAE_PORT_DEBUG ? "debug" : "service"));
 
 	rtnl_lock();
 	/* put off any impending NetWatchDogTimeout */
 	priv->netdev->trans_start = jiffies;
 
-	if (type == HNAE_PORT_DEBUG)
+	if (type == HNAE_PORT_DEBUG) {
 		hns_nic_net_reinit(priv->netdev);
+	} else {
+		netif_carrier_off(priv->netdev);
+		netif_tx_disable(priv->netdev);
+	}
 	rtnl_unlock();
 }
 
 /* for doing service complete*/
 static void hns_nic_service_event_complete(struct hns_nic_priv *priv)
 {
-	assert(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state));
+	WARN_ON(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state));
 
 	smp_mb__before_atomic();
 	clear_bit(NIC_STATE_SERVICE_SCHED, &priv->state);
@@ -1435,8 +1761,9 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv)
 	for (i = 0; i < h->q_num * 2; i++) {
 		netif_napi_del(&priv->ring_data[i].napi);
 		if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) {
-			irq_set_affinity_hint(priv->ring_data[i].ring->irq,
-					      NULL);
+			(void)irq_set_affinity_hint(
+				priv->ring_data[i].ring->irq,
+				NULL);
 			free_irq(priv->ring_data[i].ring->irq,
 				 &priv->ring_data[i]);
 		}
@@ -1446,6 +1773,31 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv)
 	kfree(priv->ring_data);
 }
 
+static void hns_nic_set_priv_ops(struct net_device *netdev)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_handle *h = priv->ae_handle;
+
+	if (AE_IS_VER1(priv->enet_ver)) {
+		priv->ops.fill_desc = fill_desc;
+		priv->ops.get_rxd_bnum = get_rx_desc_bnum;
+		priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
+	} else {
+		priv->ops.get_rxd_bnum = get_v2rx_desc_bnum;
+		if ((netdev->features & NETIF_F_TSO) ||
+		    (netdev->features & NETIF_F_TSO6)) {
+			priv->ops.fill_desc = fill_tso_desc;
+			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso;
+			/* This chip only support 7*4096 */
+			netif_set_gso_max_size(netdev, 7 * 4096);
+			h->dev->ops->set_tso_stats(h, 1);
+		} else {
+			priv->ops.fill_desc = fill_v2_desc;
+			priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx;
+		}
+	}
+}
+
 static int hns_nic_try_get_ae(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -1473,6 +1825,8 @@ static int hns_nic_try_get_ae(struct net_device *ndev)
 		goto out_init_ring_data;
 	}
 
+	hns_nic_set_priv_ops(ndev);
+
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(priv->dev, "probe register netdev fail!\n");
@@ -1524,10 +1878,10 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	priv->dev = dev;
 	priv->netdev = ndev;
 
-	if (of_device_is_compatible(node, "hisilicon,hns-nic-v2"))
-		priv->enet_ver = AE_VERSION_2;
-	else
+	if (of_device_is_compatible(node, "hisilicon,hns-nic-v1"))
 		priv->enet_ver = AE_VERSION_1;
+	else
+		priv->enet_ver = AE_VERSION_2;
 
 	ret = of_property_read_string(node, "ae-name", &priv->ae_name);
 	if (ret)
@@ -1543,6 +1897,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	ndev->priv_flags |= IFF_UNICAST_FLT;
 	ndev->netdev_ops = &hns_nic_netdev_ops;
 	hns_ethtool_set_ops(ndev);
+
 	ndev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 		NETIF_F_GRO;
@@ -1550,6 +1905,17 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM;
 	ndev->vlan_features |= NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
 
+	switch (priv->enet_ver) {
+	case AE_VERSION_2:
+		ndev->features |= NETIF_F_TSO | NETIF_F_TSO6;
+		ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+			NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+			NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6;
+		break;
+	default:
+		break;
+	}
+
 	SET_NETDEV_DEV(ndev, dev);
 
 	if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index dae0ed19ac6d..4b75270f014e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -40,6 +40,16 @@ struct hns_nic_ring_data {
 	void (*fini_process)(struct hns_nic_ring_data *);
 };
 
+/* compatible the difference between two versions */
+struct hns_nic_ops {
+	void (*fill_desc)(struct hnae_ring *ring, void *priv,
+			  int size, dma_addr_t dma, int frag_end,
+			  int buf_num, enum hns_desc_type type, int mtu);
+	int (*maybe_stop_tx)(struct sk_buff **out_skb,
+			     int *bnum, struct hnae_ring *ring);
+	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
+};
+
 struct hns_nic_priv {
 	const char *ae_name;
 	u32 enet_ver;
@@ -51,6 +61,8 @@ struct hns_nic_priv {
 	struct device *dev;
 	struct hnae_handle *ae_handle;
 
+	struct hns_nic_ops ops;
+
 	/* the cb for nic to manage the ring buffer, the first half of the
 	 * array is for tx_ring and vice versa for the second half
 	 */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a0332129970b..3b234176dd36 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -11,7 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-
 #include "hns_enet.h"
 
 #define HNS_PHY_PAGE_MDIX	0
@@ -667,6 +666,7 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev,
 	drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0';
 
 	strncpy(drvinfo->fw_version, "N/A", ETHTOOL_FWVERS_LEN);
+	drvinfo->eedump_len = 0;
 }
 
 /**
@@ -1187,6 +1187,95 @@ static int hns_nic_nway_reset(struct net_device *netdev)
 	return ret;
 }
 
+static u32
+hns_get_rss_key_size(struct net_device *netdev)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_ae_ops *ops;
+	u32 ret;
+
+	if (AE_IS_VER1(priv->enet_ver)) {
+		netdev_err(netdev,
+			   "RSS feature is not supported on this hardware\n");
+		return -EOPNOTSUPP;
+	}
+
+	ops = priv->ae_handle->dev->ops;
+	ret = ops->get_rss_key_size(priv->ae_handle);
+
+	return ret;
+}
+
+static u32
+hns_get_rss_indir_size(struct net_device *netdev)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_ae_ops *ops;
+	u32 ret;
+
+	if (AE_IS_VER1(priv->enet_ver)) {
+		netdev_err(netdev,
+			   "RSS feature is not supported on this hardware\n");
+		return -EOPNOTSUPP;
+	}
+
+	ops = priv->ae_handle->dev->ops;
+	ret = ops->get_rss_indir_size(priv->ae_handle);
+
+	return ret;
+}
+
+static int
+hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_ae_ops *ops;
+	int ret;
+
+	if (AE_IS_VER1(priv->enet_ver)) {
+		netdev_err(netdev,
+			   "RSS feature is not supported on this hardware\n");
+		return -EOPNOTSUPP;
+	}
+
+	ops = priv->ae_handle->dev->ops;
+
+	if (!indir)
+		return 0;
+
+	ret = ops->get_rss(priv->ae_handle, indir, key, hfunc);
+
+	return 0;
+}
+
+static int
+hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key,
+	    const u8 hfunc)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_ae_ops *ops;
+	int ret;
+
+	if (AE_IS_VER1(priv->enet_ver)) {
+		netdev_err(netdev,
+			   "RSS feature is not supported on this hardware\n");
+		return -EOPNOTSUPP;
+	}
+
+	ops = priv->ae_handle->dev->ops;
+
+	/* currently hfunc can only be Toeplitz hash */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	ret = ops->set_rss(priv->ae_handle, indir, key, hfunc);
+
+	return 0;
+}
+
 static struct ethtool_ops hns_ethtool_ops = {
 	.get_drvinfo = hns_nic_get_drvinfo,
 	.get_link  = hns_nic_get_link,
@@ -1206,6 +1295,10 @@ static struct ethtool_ops hns_ethtool_ops = {
 	.get_regs_len = hns_get_regs_len,
 	.get_regs = hns_get_regs,
 	.nway_reset = hns_nic_nway_reset,
+	.get_rxfh_key_size = hns_get_rss_key_size,
+	.get_rxfh_indir_size = hns_get_rss_indir_size,
+	.get_rxfh = hns_get_rss,
+	.set_rxfh = hns_set_rss,
 };
 
 void hns_ethtool_set_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index ae6e30d39f0f..1d5c3e16d8f4 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2843,7 +2843,7 @@ static void cleanup_dev(struct net_device *d)
 }
 
 #ifdef CONFIG_EISA
-static int __init hp100_eisa_probe (struct device *gendev)
+static int hp100_eisa_probe(struct device *gendev)
 {
 	struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private));
 	struct eisa_device *edev = to_eisa_device(gendev);
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index 69707108d23c..98fe5a2cd6e3 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -213,8 +213,11 @@ struct e1000_rx_ring {
 };
 
 #define E1000_DESC_UNUSED(R)						\
-	((((R)->next_to_clean > (R)->next_to_use)			\
-	  ? 0 : (R)->count) + (R)->next_to_clean - (R)->next_to_use - 1)
+({									\
+	unsigned int clean = smp_load_acquire(&(R)->next_to_clean);	\
+	unsigned int use = READ_ONCE((R)->next_to_use);			\
+	(clean > use ? 0 : (R)->count) + clean - use - 1;		\
+})
 
 #define E1000_RX_DESC_EXT(R, i)						\
 	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index b1af0d613caa..8172cf08cc33 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1,5 +1,5 @@
 /*******************************************************************************
-
+*
   Intel PRO/1000 Linux driver
   Copyright(c) 1999 - 2006 Intel Corporation.
 
@@ -106,7 +106,7 @@ u16 e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = {
 	    120, 120
 };
 
-static DEFINE_SPINLOCK(e1000_eeprom_lock);
+static DEFINE_MUTEX(e1000_eeprom_lock);
 static DEFINE_SPINLOCK(e1000_phy_lock);
 
 /**
@@ -624,8 +624,8 @@ s32 e1000_init_hw(struct e1000_hw *hw)
 		/* Workaround for PCI-X problem when BIOS sets MMRBC
 		 * incorrectly.
 		 */
-		if (hw->bus_type == e1000_bus_type_pcix
-		    && e1000_pcix_get_mmrbc(hw) > 2048)
+		if (hw->bus_type == e1000_bus_type_pcix &&
+		    e1000_pcix_get_mmrbc(hw) > 2048)
 			e1000_pcix_set_mmrbc(hw, 2048);
 		break;
 	}
@@ -683,10 +683,9 @@ static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw)
 	}
 
 	ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1,
-	                            &eeprom_data);
-	if (ret_val) {
+				    &eeprom_data);
+	if (ret_val)
 		return ret_val;
-	}
 
 	if (eeprom_data != EEPROM_RESERVED_WORD) {
 		/* Adjust SERDES output amplitude only. */
@@ -1074,8 +1073,8 @@ static s32 e1000_copper_link_preconfig(struct e1000_hw *hw)
 
 	if (hw->mac_type <= e1000_82543 ||
 	    hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 ||
-	    hw->mac_type == e1000_82541_rev_2
-	    || hw->mac_type == e1000_82547_rev_2)
+	    hw->mac_type == e1000_82541_rev_2 ||
+	    hw->mac_type == e1000_82547_rev_2)
 		hw->phy_reset_disable = false;
 
 	return E1000_SUCCESS;
@@ -1652,7 +1651,7 @@ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
 		mii_1000t_ctrl_reg = 0;
 	} else {
 		ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL,
-		                              mii_1000t_ctrl_reg);
+					      mii_1000t_ctrl_reg);
 		if (ret_val)
 			return ret_val;
 	}
@@ -1881,10 +1880,11 @@ static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw)
 		if (ret_val)
 			return ret_val;
 
-		if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543)
-		    && (!hw->autoneg)
-		    && (hw->forced_speed_duplex == e1000_10_full
-			|| hw->forced_speed_duplex == e1000_10_half)) {
+		if ((hw->mac_type == e1000_82544 ||
+		     hw->mac_type == e1000_82543) &&
+		    (!hw->autoneg) &&
+		    (hw->forced_speed_duplex == e1000_10_full ||
+		     hw->forced_speed_duplex == e1000_10_half)) {
 			ret_val = e1000_polarity_reversal_workaround(hw);
 			if (ret_val)
 				return ret_val;
@@ -2084,11 +2084,12 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw)
 	 * so we had to force link.  In this case, we need to force the
 	 * configuration of the MAC to match the "fc" parameter.
 	 */
-	if (((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed))
-	    || ((hw->media_type == e1000_media_type_internal_serdes)
-		&& (hw->autoneg_failed))
-	    || ((hw->media_type == e1000_media_type_copper)
-		&& (!hw->autoneg))) {
+	if (((hw->media_type == e1000_media_type_fiber) &&
+	     (hw->autoneg_failed)) ||
+	    ((hw->media_type == e1000_media_type_internal_serdes) &&
+	     (hw->autoneg_failed)) ||
+	    ((hw->media_type == e1000_media_type_copper) &&
+	     (!hw->autoneg))) {
 		ret_val = e1000_force_mac_fc(hw);
 		if (ret_val) {
 			e_dbg("Error forcing flow control settings\n");
@@ -2193,8 +2194,7 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw)
 			else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
 				 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
 				 (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
-				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR))
-			{
+				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
 				hw->fc = E1000_FC_TX_PAUSE;
 				e_dbg
 				    ("Flow Control = TX PAUSE frames only.\n");
@@ -2210,8 +2210,7 @@ static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw)
 			else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
 				 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
 				 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
-				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR))
-			{
+				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
 				hw->fc = E1000_FC_RX_PAUSE;
 				e_dbg
 				    ("Flow Control = RX PAUSE frames only.\n");
@@ -2460,10 +2459,11 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
 			 * happen due to the execution of this workaround.
 			 */
 
-			if ((hw->mac_type == e1000_82544
-			     || hw->mac_type == e1000_82543) && (!hw->autoneg)
-			    && (hw->forced_speed_duplex == e1000_10_full
-				|| hw->forced_speed_duplex == e1000_10_half)) {
+			if ((hw->mac_type == e1000_82544 ||
+			     hw->mac_type == e1000_82543) &&
+			    (!hw->autoneg) &&
+			    (hw->forced_speed_duplex == e1000_10_full ||
+			     hw->forced_speed_duplex == e1000_10_half)) {
 				ew32(IMC, 0xffffffff);
 				ret_val =
 				    e1000_polarity_reversal_workaround(hw);
@@ -2528,8 +2528,10 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
 		 */
 		if (hw->tbi_compatibility_en) {
 			u16 speed, duplex;
+
 			ret_val =
 			    e1000_get_speed_and_duplex(hw, &speed, &duplex);
+
 			if (ret_val) {
 				e_dbg
 				    ("Error getting link speed and duplex\n");
@@ -2628,10 +2630,10 @@ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
 			    e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data);
 			if (ret_val)
 				return ret_val;
-			if ((*speed == SPEED_100
-			     && !(phy_data & NWAY_LPAR_100TX_FD_CAPS))
-			    || (*speed == SPEED_10
-				&& !(phy_data & NWAY_LPAR_10T_FD_CAPS)))
+			if ((*speed == SPEED_100 &&
+			     !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) ||
+			    (*speed == SPEED_10 &&
+			     !(phy_data & NWAY_LPAR_10T_FD_CAPS)))
 				*duplex = HALF_DUPLEX;
 		}
 	}
@@ -2664,9 +2666,9 @@ static s32 e1000_wait_autoneg(struct e1000_hw *hw)
 		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
 		if (ret_val)
 			return ret_val;
-		if (phy_data & MII_SR_AUTONEG_COMPLETE) {
+		if (phy_data & MII_SR_AUTONEG_COMPLETE)
 			return E1000_SUCCESS;
-		}
+
 		msleep(100);
 	}
 	return E1000_SUCCESS;
@@ -2803,11 +2805,11 @@ static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw)
 	return data;
 }
 
-
 /**
  * e1000_read_phy_reg - read a phy register
  * @hw: Struct containing variables accessed by shared code
  * @reg_addr: address of the PHY register to read
+ * @phy_data: pointer to the value on the PHY register
  *
  * Reads the value from a PHY register, if the value is on a specific non zero
  * page, sets the page first.
@@ -2823,14 +2825,13 @@ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data)
 	    (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
 		ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
 						 (u16) reg_addr);
-		if (ret_val) {
-			spin_unlock_irqrestore(&e1000_phy_lock, flags);
-			return ret_val;
-		}
+		if (ret_val)
+			goto out;
 	}
 
 	ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
 					phy_data);
+out:
 	spin_unlock_irqrestore(&e1000_phy_lock, flags);
 
 	return ret_val;
@@ -2881,7 +2882,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
 				e_dbg("MDI Read Error\n");
 				return -E1000_ERR_PHY;
 			}
-			*phy_data = (u16) mdic;
+			*phy_data = (u16)mdic;
 		} else {
 			mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
 				(phy_addr << E1000_MDIC_PHY_SHIFT) |
@@ -2906,7 +2907,7 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
 				e_dbg("MDI Error\n");
 				return -E1000_ERR_PHY;
 			}
-			*phy_data = (u16) mdic;
+			*phy_data = (u16)mdic;
 		}
 	} else {
 		/* We must first send a preamble through the MDIO pin to signal
@@ -2960,7 +2961,7 @@ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 phy_data)
 	if ((hw->phy_type == e1000_phy_igp) &&
 	    (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
 		ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
-						 (u16) reg_addr);
+						 (u16)reg_addr);
 		if (ret_val) {
 			spin_unlock_irqrestore(&e1000_phy_lock, flags);
 			return ret_val;
@@ -2993,7 +2994,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
 		 * the desired data.
 		 */
 		if (hw->mac_type == e1000_ce4100) {
-			mdic = (((u32) phy_data) |
+			mdic = (((u32)phy_data) |
 				(reg_addr << E1000_MDIC_REG_SHIFT) |
 				(phy_addr << E1000_MDIC_PHY_SHIFT) |
 				(INTEL_CE_GBE_MDIC_OP_WRITE) |
@@ -3015,7 +3016,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
 				return -E1000_ERR_PHY;
 			}
 		} else {
-			mdic = (((u32) phy_data) |
+			mdic = (((u32)phy_data) |
 				(reg_addr << E1000_MDIC_REG_SHIFT) |
 				(phy_addr << E1000_MDIC_PHY_SHIFT) |
 				(E1000_MDIC_OP_WRITE));
@@ -3053,7 +3054,7 @@ static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
 		mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) |
 			(PHY_OP_WRITE << 12) | (PHY_SOF << 14));
 		mdic <<= 16;
-		mdic |= (u32) phy_data;
+		mdic |= (u32)phy_data;
 
 		e1000_shift_out_mdi_bits(hw, mdic, 32);
 	}
@@ -3176,14 +3177,14 @@ static s32 e1000_detect_gig_phy(struct e1000_hw *hw)
 	if (ret_val)
 		return ret_val;
 
-	hw->phy_id = (u32) (phy_id_high << 16);
+	hw->phy_id = (u32)(phy_id_high << 16);
 	udelay(20);
 	ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low);
 	if (ret_val)
 		return ret_val;
 
-	hw->phy_id |= (u32) (phy_id_low & PHY_REVISION_MASK);
-	hw->phy_revision = (u32) phy_id_low & ~PHY_REVISION_MASK;
+	hw->phy_id |= (u32)(phy_id_low & PHY_REVISION_MASK);
+	hw->phy_revision = (u32)phy_id_low & ~PHY_REVISION_MASK;
 
 	switch (hw->mac_type) {
 	case e1000_82543:
@@ -3401,7 +3402,6 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
 		phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
 				       SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
 		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
-
 	}
 
 	return E1000_SUCCESS;
@@ -3449,7 +3449,7 @@ s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info)
 	if (hw->phy_type == e1000_phy_igp)
 		return e1000_phy_igp_get_info(hw, phy_info);
 	else if ((hw->phy_type == e1000_phy_8211) ||
-	         (hw->phy_type == e1000_phy_8201))
+		 (hw->phy_type == e1000_phy_8201))
 		return E1000_SUCCESS;
 	else
 		return e1000_phy_m88_get_info(hw, phy_info);
@@ -3611,11 +3611,11 @@ static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count)
 	 */
 	mask = 0x01 << (count - 1);
 	eecd = er32(EECD);
-	if (eeprom->type == e1000_eeprom_microwire) {
+	if (eeprom->type == e1000_eeprom_microwire)
 		eecd &= ~E1000_EECD_DO;
-	} else if (eeprom->type == e1000_eeprom_spi) {
+	else if (eeprom->type == e1000_eeprom_spi)
 		eecd |= E1000_EECD_DO;
-	}
+
 	do {
 		/* A "1" is shifted out to the EEPROM by setting bit "DI" to a
 		 * "1", and then raising and then lowering the clock (the SK bit
@@ -3851,7 +3851,7 @@ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw)
 	do {
 		e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI,
 					hw->eeprom.opcode_bits);
-		spi_stat_reg = (u8) e1000_shift_in_ee_bits(hw, 8);
+		spi_stat_reg = (u8)e1000_shift_in_ee_bits(hw, 8);
 		if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI))
 			break;
 
@@ -3882,9 +3882,10 @@ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw)
 s32 e1000_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 {
 	s32 ret;
-	spin_lock(&e1000_eeprom_lock);
+
+	mutex_lock(&e1000_eeprom_lock);
 	ret = e1000_do_read_eeprom(hw, offset, words, data);
-	spin_unlock(&e1000_eeprom_lock);
+	mutex_unlock(&e1000_eeprom_lock);
 	return ret;
 }
 
@@ -3896,15 +3897,16 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
 
 	if (hw->mac_type == e1000_ce4100) {
 		GBE_CONFIG_FLASH_READ(GBE_CONFIG_BASE_VIRT, offset, words,
-		                      data);
+				      data);
 		return E1000_SUCCESS;
 	}
 
 	/* A check for invalid values:  offset too large, too many words, and
 	 * not enough words.
 	 */
-	if ((offset >= eeprom->word_size)
-	    || (words > eeprom->word_size - offset) || (words == 0)) {
+	if ((offset >= eeprom->word_size) ||
+	    (words > eeprom->word_size - offset) ||
+	    (words == 0)) {
 		e_dbg("\"words\" parameter out of bounds. Words = %d,"
 		      "size = %d\n", offset, eeprom->word_size);
 		return -E1000_ERR_EEPROM;
@@ -3940,7 +3942,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
 
 		/* Send the READ command (opcode + addr)  */
 		e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits);
-		e1000_shift_out_ee_bits(hw, (u16) (offset * 2),
+		e1000_shift_out_ee_bits(hw, (u16)(offset * 2),
 					eeprom->address_bits);
 
 		/* Read the data.  The address of the eeprom internally
@@ -3960,7 +3962,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
 			e1000_shift_out_ee_bits(hw,
 						EEPROM_READ_OPCODE_MICROWIRE,
 						eeprom->opcode_bits);
-			e1000_shift_out_ee_bits(hw, (u16) (offset + i),
+			e1000_shift_out_ee_bits(hw, (u16)(offset + i),
 						eeprom->address_bits);
 
 			/* Read the data.  For microwire, each word requires the
@@ -3968,6 +3970,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
 			 */
 			data[i] = e1000_shift_in_ee_bits(hw, 16);
 			e1000_standby_eeprom(hw);
+			cond_resched();
 		}
 	}
 
@@ -4004,7 +4007,7 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw)
 		return E1000_SUCCESS;
 
 #endif
-	if (checksum == (u16) EEPROM_SUM)
+	if (checksum == (u16)EEPROM_SUM)
 		return E1000_SUCCESS;
 	else {
 		e_dbg("EEPROM Checksum Invalid\n");
@@ -4031,7 +4034,7 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw)
 		}
 		checksum += eeprom_data;
 	}
-	checksum = (u16) EEPROM_SUM - checksum;
+	checksum = (u16)EEPROM_SUM - checksum;
 	if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) {
 		e_dbg("EEPROM Write Error\n");
 		return -E1000_ERR_EEPROM;
@@ -4052,9 +4055,10 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw)
 s32 e1000_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
 {
 	s32 ret;
-	spin_lock(&e1000_eeprom_lock);
+
+	mutex_lock(&e1000_eeprom_lock);
 	ret = e1000_do_write_eeprom(hw, offset, words, data);
-	spin_unlock(&e1000_eeprom_lock);
+	mutex_unlock(&e1000_eeprom_lock);
 	return ret;
 }
 
@@ -4066,15 +4070,16 @@ static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
 
 	if (hw->mac_type == e1000_ce4100) {
 		GBE_CONFIG_FLASH_WRITE(GBE_CONFIG_BASE_VIRT, offset, words,
-		                       data);
+				       data);
 		return E1000_SUCCESS;
 	}
 
 	/* A check for invalid values:  offset too large, too many words, and
 	 * not enough words.
 	 */
-	if ((offset >= eeprom->word_size)
-	    || (words > eeprom->word_size - offset) || (words == 0)) {
+	if ((offset >= eeprom->word_size) ||
+	    (words > eeprom->word_size - offset) ||
+	    (words == 0)) {
 		e_dbg("\"words\" parameter out of bounds\n");
 		return -E1000_ERR_EEPROM;
 	}
@@ -4116,6 +4121,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words,
 			return -E1000_ERR_EEPROM;
 
 		e1000_standby_eeprom(hw);
+		cond_resched();
 
 		/*  Send the WRITE ENABLE command (8 bit opcode )  */
 		e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI,
@@ -4132,7 +4138,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words,
 		/* Send the Write command (8-bit opcode + addr) */
 		e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits);
 
-		e1000_shift_out_ee_bits(hw, (u16) ((offset + widx) * 2),
+		e1000_shift_out_ee_bits(hw, (u16)((offset + widx) * 2),
 					eeprom->address_bits);
 
 		/* Send the data */
@@ -4142,6 +4148,7 @@ static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words,
 		 */
 		while (widx < words) {
 			u16 word_out = data[widx];
+
 			word_out = (word_out >> 8) | (word_out << 8);
 			e1000_shift_out_ee_bits(hw, word_out, 16);
 			widx++;
@@ -4183,9 +4190,9 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
 	 * EEPROM into write/erase mode.
 	 */
 	e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE,
-				(u16) (eeprom->opcode_bits + 2));
+				(u16)(eeprom->opcode_bits + 2));
 
-	e1000_shift_out_ee_bits(hw, 0, (u16) (eeprom->address_bits - 2));
+	e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2));
 
 	/* Prepare the EEPROM */
 	e1000_standby_eeprom(hw);
@@ -4195,7 +4202,7 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
 		e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE,
 					eeprom->opcode_bits);
 
-		e1000_shift_out_ee_bits(hw, (u16) (offset + words_written),
+		e1000_shift_out_ee_bits(hw, (u16)(offset + words_written),
 					eeprom->address_bits);
 
 		/* Send the data */
@@ -4224,6 +4231,7 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
 
 		/* Recover from write */
 		e1000_standby_eeprom(hw);
+		cond_resched();
 
 		words_written++;
 	}
@@ -4235,9 +4243,9 @@ static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
 	 * EEPROM out of write/erase mode.
 	 */
 	e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE,
-				(u16) (eeprom->opcode_bits + 2));
+				(u16)(eeprom->opcode_bits + 2));
 
-	e1000_shift_out_ee_bits(hw, 0, (u16) (eeprom->address_bits - 2));
+	e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2));
 
 	return E1000_SUCCESS;
 }
@@ -4260,8 +4268,8 @@ s32 e1000_read_mac_addr(struct e1000_hw *hw)
 			e_dbg("EEPROM Read Error\n");
 			return -E1000_ERR_EEPROM;
 		}
-		hw->perm_mac_addr[i] = (u8) (eeprom_data & 0x00FF);
-		hw->perm_mac_addr[i + 1] = (u8) (eeprom_data >> 8);
+		hw->perm_mac_addr[i] = (u8)(eeprom_data & 0x00FF);
+		hw->perm_mac_addr[i + 1] = (u8)(eeprom_data >> 8);
 	}
 
 	switch (hw->mac_type) {
@@ -4328,19 +4336,19 @@ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
 		 */
 	case 0:
 		/* [47:36] i.e. 0x563 for above example address */
-		hash_value = ((mc_addr[4] >> 4) | (((u16) mc_addr[5]) << 4));
+		hash_value = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
 		break;
 	case 1:
 		/* [46:35] i.e. 0xAC6 for above example address */
-		hash_value = ((mc_addr[4] >> 3) | (((u16) mc_addr[5]) << 5));
+		hash_value = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
 		break;
 	case 2:
 		/* [45:34] i.e. 0x5D8 for above example address */
-		hash_value = ((mc_addr[4] >> 2) | (((u16) mc_addr[5]) << 6));
+		hash_value = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
 		break;
 	case 3:
 		/* [43:32] i.e. 0x634 for above example address */
-		hash_value = ((mc_addr[4]) | (((u16) mc_addr[5]) << 8));
+		hash_value = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
 		break;
 	}
 
@@ -4361,9 +4369,9 @@ void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
 	/* HW expects these in little endian so we reverse the byte order
 	 * from network order (big endian) to little endian
 	 */
-	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
-		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
-	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+	rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
 
 	/* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx
 	 * unit hang.
@@ -4537,7 +4545,7 @@ s32 e1000_setup_led(struct e1000_hw *hw)
 		if (ret_val)
 			return ret_val;
 		ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
-					      (u16) (hw->phy_spd_default &
+					      (u16)(hw->phy_spd_default &
 						     ~IGP01E1000_GMII_SPD));
 		if (ret_val)
 			return ret_val;
@@ -4802,7 +4810,7 @@ void e1000_reset_adaptive(struct e1000_hw *hw)
 void e1000_update_adaptive(struct e1000_hw *hw)
 {
 	if (hw->adaptive_ifs) {
-		if ((hw->collision_delta *hw->ifs_ratio) > hw->tx_packet_delta) {
+		if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) {
 			if (hw->tx_packet_delta > MIN_NUM_XMITS) {
 				hw->in_ifs_mode = true;
 				if (hw->current_ifs_val < hw->ifs_max_val) {
@@ -4816,8 +4824,8 @@ void e1000_update_adaptive(struct e1000_hw *hw)
 				}
 			}
 		} else {
-			if (hw->in_ifs_mode
-			    && (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
+			if (hw->in_ifs_mode &&
+			    (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
 				hw->current_ifs_val = 0;
 				hw->in_ifs_mode = false;
 				ew32(AIT, 0);
@@ -4922,7 +4930,6 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
 
 	/* Use old method for Phy older than IGP */
 	if (hw->phy_type == e1000_phy_m88) {
-
 		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
 					     &phy_data);
 		if (ret_val)
@@ -4966,7 +4973,6 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
 		};
 		/* Read the AGC registers for all channels */
 		for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
-
 			ret_val =
 			    e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
 			if (ret_val)
@@ -4976,8 +4982,8 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
 
 			/* Value bound check. */
 			if ((cur_agc_value >=
-			     IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1)
-			    || (cur_agc_value == 0))
+			     IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) ||
+			    (cur_agc_value == 0))
 				return -E1000_ERR_PHY;
 
 			agc_value += cur_agc_value;
@@ -5054,7 +5060,6 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
 		 */
 		if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
 		    IGP01E1000_PSSR_SPEED_1000MBPS) {
-
 			/* Read the GIG initialization PCS register (0x00B4) */
 			ret_val =
 			    e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG,
@@ -5175,8 +5180,8 @@ static s32 e1000_1000Mb_check_cable_length(struct e1000_hw *hw)
 				hw->ffe_config_state = e1000_ffe_config_active;
 
 				ret_val = e1000_write_phy_reg(hw,
-					      IGP01E1000_PHY_DSP_FFE,
-					      IGP01E1000_PHY_DSP_FFE_CM_CP);
+							      IGP01E1000_PHY_DSP_FFE,
+							      IGP01E1000_PHY_DSP_FFE_CM_CP);
 				if (ret_val)
 					return ret_val;
 				break;
@@ -5243,7 +5248,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up)
 			msleep(20);
 
 			ret_val = e1000_write_phy_reg(hw, 0x0000,
-						    IGP01E1000_IEEE_FORCE_GIGA);
+						      IGP01E1000_IEEE_FORCE_GIGA);
 			if (ret_val)
 				return ret_val;
 			for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
@@ -5264,7 +5269,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up)
 			}
 
 			ret_val = e1000_write_phy_reg(hw, 0x0000,
-					IGP01E1000_IEEE_RESTART_AUTONEG);
+						      IGP01E1000_IEEE_RESTART_AUTONEG);
 			if (ret_val)
 				return ret_val;
 
@@ -5299,7 +5304,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up)
 			msleep(20);
 
 			ret_val = e1000_write_phy_reg(hw, 0x0000,
-						    IGP01E1000_IEEE_FORCE_GIGA);
+						      IGP01E1000_IEEE_FORCE_GIGA);
 			if (ret_val)
 				return ret_val;
 			ret_val =
@@ -5309,7 +5314,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up)
 				return ret_val;
 
 			ret_val = e1000_write_phy_reg(hw, 0x0000,
-					IGP01E1000_IEEE_RESTART_AUTONEG);
+						      IGP01E1000_IEEE_RESTART_AUTONEG);
 			if (ret_val)
 				return ret_val;
 
@@ -5346,9 +5351,8 @@ static s32 e1000_set_phy_mode(struct e1000_hw *hw)
 		ret_val =
 		    e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1,
 				      &eeprom_data);
-		if (ret_val) {
+		if (ret_val)
 			return ret_val;
-		}
 
 		if ((eeprom_data != EEPROM_RESERVED_WORD) &&
 		    (eeprom_data & EEPROM_PHY_CLASS_A)) {
@@ -5395,8 +5399,8 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
 	 * from the lowest speeds starting from 10Mbps. The capability is used
 	 * for Dx transitions and states
 	 */
-	if (hw->mac_type == e1000_82541_rev_2
-	    || hw->mac_type == e1000_82547_rev_2) {
+	if (hw->mac_type == e1000_82541_rev_2 ||
+	    hw->mac_type == e1000_82547_rev_2) {
 		ret_val =
 		    e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data);
 		if (ret_val)
@@ -5446,11 +5450,9 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
 			if (ret_val)
 				return ret_val;
 		}
-	} else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT)
-		   || (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL)
-		   || (hw->autoneg_advertised ==
-		       AUTONEG_ADVERTISE_10_100_ALL)) {
-
+	} else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) ||
+		   (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL) ||
+		   (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) {
 		if (hw->mac_type == e1000_82541_rev_2 ||
 		    hw->mac_type == e1000_82547_rev_2) {
 			phy_data |= IGP01E1000_GMII_FLEX_SPD;
@@ -5474,7 +5476,6 @@ static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
 					phy_data);
 		if (ret_val)
 			return ret_val;
-
 	}
 	return E1000_SUCCESS;
 }
@@ -5542,7 +5543,6 @@ static s32 e1000_set_vco_speed(struct e1000_hw *hw)
 	return E1000_SUCCESS;
 }
 
-
 /**
  * e1000_enable_mng_pass_thru - check for bmc pass through
  * @hw: Struct containing variables accessed by shared code
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index fd7be860c201..3fc7bde699ba 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -99,13 +99,13 @@ int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
 void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
 void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
 static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
-                             struct e1000_tx_ring *txdr);
+				    struct e1000_tx_ring *txdr);
 static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
-                             struct e1000_rx_ring *rxdr);
+				    struct e1000_rx_ring *rxdr);
 static void e1000_free_tx_resources(struct e1000_adapter *adapter,
-                             struct e1000_tx_ring *tx_ring);
+				    struct e1000_tx_ring *tx_ring);
 static void e1000_free_rx_resources(struct e1000_adapter *adapter,
-                             struct e1000_rx_ring *rx_ring);
+				    struct e1000_rx_ring *rx_ring);
 void e1000_update_stats(struct e1000_adapter *adapter);
 
 static int e1000_init_module(void);
@@ -122,16 +122,16 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter);
 static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter);
 static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter);
 static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
-                                struct e1000_tx_ring *tx_ring);
+				struct e1000_tx_ring *tx_ring);
 static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
-                                struct e1000_rx_ring *rx_ring);
+				struct e1000_rx_ring *rx_ring);
 static void e1000_set_rx_mode(struct net_device *netdev);
 static void e1000_update_phy_info_task(struct work_struct *work);
 static void e1000_watchdog(struct work_struct *work);
 static void e1000_82547_tx_fifo_stall_task(struct work_struct *work);
 static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 				    struct net_device *netdev);
-static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
+static struct net_device_stats *e1000_get_stats(struct net_device *netdev);
 static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
 static int e1000_set_mac(struct net_device *netdev, void *p);
 static irqreturn_t e1000_intr(int irq, void *data);
@@ -164,7 +164,7 @@ static void e1000_tx_timeout(struct net_device *dev);
 static void e1000_reset_task(struct work_struct *work);
 static void e1000_smartspeed(struct e1000_adapter *adapter);
 static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
-                                       struct sk_buff *skb);
+				       struct sk_buff *skb);
 
 static bool e1000_vlan_used(struct e1000_adapter *adapter);
 static void e1000_vlan_mode(struct net_device *netdev,
@@ -195,7 +195,7 @@ MODULE_PARM_DESC(copybreak,
 	"Maximum size of packet that is copied to a new buffer on receive");
 
 static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
-                     pci_channel_state_t state);
+						pci_channel_state_t state);
 static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev);
 static void e1000_io_resume(struct pci_dev *pdev);
 
@@ -287,7 +287,7 @@ static int e1000_request_irq(struct e1000_adapter *adapter)
 	int err;
 
 	err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
-	                  netdev);
+			  netdev);
 	if (err) {
 		e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
 	}
@@ -636,8 +636,8 @@ void e1000_reset(struct e1000_adapter *adapter)
 		 * but don't include ethernet FCS because hardware appends it
 		 */
 		min_tx_space = (hw->max_frame_size +
-		                sizeof(struct e1000_tx_desc) -
-		                ETH_FCS_LEN) * 2;
+				sizeof(struct e1000_tx_desc) -
+				ETH_FCS_LEN) * 2;
 		min_tx_space = ALIGN(min_tx_space, 1024);
 		min_tx_space >>= 10;
 		/* software strips receive CRC, so leave room for it */
@@ -943,8 +943,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct e1000_adapter *adapter;
 	struct e1000_hw *hw;
 
-	static int cards_found = 0;
-	static int global_quad_port_a = 0; /* global ksp3 port a indication */
+	static int cards_found;
+	static int global_quad_port_a; /* global ksp3 port a indication */
 	int i, err, pci_using_dac;
 	u16 eeprom_data = 0;
 	u16 tmp = 0;
@@ -1046,7 +1046,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (hw->mac_type == e1000_ce4100) {
 		hw->ce4100_gbe_mdio_base_virt =
 					ioremap(pci_resource_start(pdev, BAR_1),
-		                                pci_resource_len(pdev, BAR_1));
+						pci_resource_len(pdev, BAR_1));
 
 		if (!hw->ce4100_gbe_mdio_base_virt)
 			goto err_mdio_ioremap;
@@ -1148,7 +1148,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		break;
 	case e1000_82546:
 	case e1000_82546_rev_3:
-		if (er32(STATUS) & E1000_STATUS_FUNC_1){
+		if (er32(STATUS) & E1000_STATUS_FUNC_1) {
 			e1000_read_eeprom(hw,
 				EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 			break;
@@ -1199,13 +1199,13 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		for (i = 0; i < 32; i++) {
 			hw->phy_addr = i;
 			e1000_read_phy_reg(hw, PHY_ID2, &tmp);
-			if (tmp == 0 || tmp == 0xFF) {
-				if (i == 31)
-					goto err_eeprom;
-				continue;
-			} else
+
+			if (tmp != 0 && tmp != 0xFF)
 				break;
 		}
+
+		if (i >= 32)
+			goto err_eeprom;
 	}
 
 	/* reset the hardware with the new settings */
@@ -1263,7 +1263,7 @@ err_pci_reg:
  * @pdev: PCI device information struct
  *
  * e1000_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
+ * that it should release a PCI device. That could be caused by a
  * Hot-Plug event, or because the driver is going to be removed from
  * memory.
  **/
@@ -1334,12 +1334,12 @@ static int e1000_sw_init(struct e1000_adapter *adapter)
 static int e1000_alloc_queues(struct e1000_adapter *adapter)
 {
 	adapter->tx_ring = kcalloc(adapter->num_tx_queues,
-	                           sizeof(struct e1000_tx_ring), GFP_KERNEL);
+				   sizeof(struct e1000_tx_ring), GFP_KERNEL);
 	if (!adapter->tx_ring)
 		return -ENOMEM;
 
 	adapter->rx_ring = kcalloc(adapter->num_rx_queues,
-	                           sizeof(struct e1000_rx_ring), GFP_KERNEL);
+				   sizeof(struct e1000_rx_ring), GFP_KERNEL);
 	if (!adapter->rx_ring) {
 		kfree(adapter->tx_ring);
 		return -ENOMEM;
@@ -1811,20 +1811,20 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
 	rctl &= ~E1000_RCTL_SZ_4096;
 	rctl |= E1000_RCTL_BSEX;
 	switch (adapter->rx_buffer_len) {
-		case E1000_RXBUFFER_2048:
-		default:
-			rctl |= E1000_RCTL_SZ_2048;
-			rctl &= ~E1000_RCTL_BSEX;
-			break;
-		case E1000_RXBUFFER_4096:
-			rctl |= E1000_RCTL_SZ_4096;
-			break;
-		case E1000_RXBUFFER_8192:
-			rctl |= E1000_RCTL_SZ_8192;
-			break;
-		case E1000_RXBUFFER_16384:
-			rctl |= E1000_RCTL_SZ_16384;
-			break;
+	case E1000_RXBUFFER_2048:
+	default:
+		rctl |= E1000_RCTL_SZ_2048;
+		rctl &= ~E1000_RCTL_BSEX;
+		break;
+	case E1000_RXBUFFER_4096:
+		rctl |= E1000_RCTL_SZ_4096;
+		break;
+	case E1000_RXBUFFER_8192:
+		rctl |= E1000_RCTL_SZ_8192;
+		break;
+	case E1000_RXBUFFER_16384:
+		rctl |= E1000_RCTL_SZ_16384;
+		break;
 	}
 
 	/* This is useful for sniffing bad packets. */
@@ -1861,12 +1861,12 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 
 	if (adapter->netdev->mtu > ETH_DATA_LEN) {
 		rdlen = adapter->rx_ring[0].count *
-		        sizeof(struct e1000_rx_desc);
+			sizeof(struct e1000_rx_desc);
 		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
 		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
 	} else {
 		rdlen = adapter->rx_ring[0].count *
-		        sizeof(struct e1000_rx_desc);
+			sizeof(struct e1000_rx_desc);
 		adapter->clean_rx = e1000_clean_rx_irq;
 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
 	}
@@ -2761,7 +2761,9 @@ static int e1000_tso(struct e1000_adapter *adapter,
 		buffer_info->time_stamp = jiffies;
 		buffer_info->next_to_watch = i;
 
-		if (++i == tx_ring->count) i = 0;
+		if (++i == tx_ring->count)
+			i = 0;
+
 		tx_ring->next_to_use = i;
 
 		return true;
@@ -2816,7 +2818,9 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter,
 	buffer_info->time_stamp = jiffies;
 	buffer_info->next_to_watch = i;
 
-	if (unlikely(++i == tx_ring->count)) i = 0;
+	if (unlikely(++i == tx_ring->count))
+		i = 0;
+
 	tx_ring->next_to_use = i;
 
 	return true;
@@ -2865,8 +2869,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 		 * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
 		 */
 		if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
-		                (size > 2015) && count == 0))
-		        size = 2015;
+			     (size > 2015) && count == 0))
+			size = 2015;
 
 		/* Workaround for potential 82544 hang in PCI-X.  Avoid
 		 * terminating buffers within evenly-aligned dwords.
@@ -2963,7 +2967,7 @@ dma_error:
 		count--;
 
 	while (count--) {
-		if (i==0)
+		if (i == 0)
 			i += tx_ring->count;
 		i--;
 		buffer_info = &tx_ring->buffer_info[i];
@@ -3013,7 +3017,8 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
 		tx_desc->lower.data =
 			cpu_to_le32(txd_lower | buffer_info->length);
 		tx_desc->upper.data = cpu_to_le32(txd_upper);
-		if (unlikely(++i == tx_ring->count)) i = 0;
+		if (unlikely(++i == tx_ring->count))
+			i = 0;
 	}
 
 	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
@@ -3101,7 +3106,7 @@ static int e1000_maybe_stop_tx(struct net_device *netdev,
 	return __e1000_maybe_stop_tx(netdev, size);
 }
 
-#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
+#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1)
 static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 				    struct net_device *netdev)
 {
@@ -3841,7 +3846,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 	struct e1000_tx_buffer *buffer_info;
 	unsigned int i, eop;
 	unsigned int count = 0;
-	unsigned int total_tx_bytes=0, total_tx_packets=0;
+	unsigned int total_tx_bytes = 0, total_tx_packets = 0;
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 
 	i = tx_ring->next_to_clean;
@@ -3869,14 +3874,18 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
 			e1000_unmap_and_free_tx_resource(adapter, buffer_info);
 			tx_desc->upper.data = 0;
 
-			if (unlikely(++i == tx_ring->count)) i = 0;
+			if (unlikely(++i == tx_ring->count))
+				i = 0;
 		}
 
 		eop = tx_ring->buffer_info[i].next_to_watch;
 		eop_desc = E1000_TX_DESC(*tx_ring, eop);
 	}
 
-	tx_ring->next_to_clean = i;
+	/* Synchronize with E1000_DESC_UNUSED called from e1000_xmit_frame,
+	 * which will reuse the cleaned buffers.
+	 */
+	smp_store_release(&tx_ring->next_to_clean, i);
 
 	netdev_completed_queue(netdev, pkts_compl, bytes_compl);
 
@@ -3954,9 +3963,11 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 	skb_checksum_none_assert(skb);
 
 	/* 82543 or newer only */
-	if (unlikely(hw->mac_type < e1000_82543)) return;
+	if (unlikely(hw->mac_type < e1000_82543))
+		return;
 	/* Ignore Checksum bit is set */
-	if (unlikely(status & E1000_RXD_STAT_IXSM)) return;
+	if (unlikely(status & E1000_RXD_STAT_IXSM))
+		return;
 	/* TCP/UDP checksum error bit is set */
 	if (unlikely(errors & E1000_RXD_ERR_TCPE)) {
 		/* let the stack verify checksum errors */
@@ -4136,7 +4147,7 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
 	unsigned int i;
 	int cleaned_count = 0;
 	bool cleaned = false;
-	unsigned int total_rx_bytes=0, total_rx_packets=0;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 
 	i = rx_ring->next_to_clean;
 	rx_desc = E1000_RX_DESC(*rx_ring, i);
@@ -4153,7 +4164,9 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
 
 		status = rx_desc->status;
 
-		if (++i == rx_ring->count) i = 0;
+		if (++i == rx_ring->count)
+			i = 0;
+
 		next_rxd = E1000_RX_DESC(*rx_ring, i);
 		prefetch(next_rxd);
 
@@ -4356,7 +4369,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
 	unsigned int i;
 	int cleaned_count = 0;
 	bool cleaned = false;
-	unsigned int total_rx_bytes=0, total_rx_packets=0;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 
 	i = rx_ring->next_to_clean;
 	rx_desc = E1000_RX_DESC(*rx_ring, i);
@@ -4395,7 +4408,9 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
 			buffer_info->rxbuf.data = NULL;
 		}
 
-		if (++i == rx_ring->count) i = 0;
+		if (++i == rx_ring->count)
+			i = 0;
+
 		next_rxd = E1000_RX_DESC(*rx_ring, i);
 		prefetch(next_rxd);
 
@@ -4683,9 +4698,11 @@ static void e1000_smartspeed(struct e1000_adapter *adapter)
 		 * we assume back-to-back
 		 */
 		e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status);
-		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
+		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT))
+			return;
 		e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status);
-		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
+		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT))
+			return;
 		e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_ctrl);
 		if (phy_ctrl & CR_1000T_MS_ENABLE) {
 			phy_ctrl &= ~CR_1000T_MS_ENABLE;
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index c9da4654e9ca..b3949d5bef5c 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -91,6 +91,7 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_SPT_I219_V		0x1570	/* SPT PCH */
 #define E1000_DEV_ID_PCH_SPT_I219_LM2		0x15B7	/* SPT-H PCH */
 #define E1000_DEV_ID_PCH_SPT_I219_V2		0x15B8	/* SPT-H PCH */
+#define E1000_DEV_ID_PCH_LBG_I219_LM3		0x15B9	/* LBG PCH */
 
 #define E1000_REVISION_4	4
 
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 91a5a0ae9cd7..a049e30639a1 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1984,7 +1984,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
 	int i = 0;
 
 	while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
-	       (i++ < 10))
+	       (i++ < 30))
 		usleep_range(10000, 20000);
 	return blocked ? E1000_BLK_PHY_RESET : 0;
 }
@@ -3093,24 +3093,45 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
 	struct e1000_nvm_info *nvm = &hw->nvm;
 	u32 bank1_offset = nvm->flash_bank_size * sizeof(u16);
 	u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1;
+	u32 nvm_dword = 0;
 	u8 sig_byte = 0;
 	s32 ret_val;
 
 	switch (hw->mac.type) {
-		/* In SPT, read from the CTRL_EXT reg instead of
-		 * accessing the sector valid bits from the nvm
-		 */
 	case e1000_pch_spt:
-		*bank = er32(CTRL_EXT)
-		    & E1000_CTRL_EXT_NVMVS;
-		if ((*bank == 0) || (*bank == 1)) {
-			e_dbg("ERROR: No valid NVM bank present\n");
-			return -E1000_ERR_NVM;
-		} else {
-			*bank = *bank - 2;
+		bank1_offset = nvm->flash_bank_size;
+		act_offset = E1000_ICH_NVM_SIG_WORD;
+
+		/* set bank to 0 in case flash read fails */
+		*bank = 0;
+
+		/* Check bank 0 */
+		ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset,
+							 &nvm_dword);
+		if (ret_val)
+			return ret_val;
+		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 0;
 			return 0;
 		}
-		break;
+
+		/* Check bank 1 */
+		ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset +
+							 bank1_offset,
+							 &nvm_dword);
+		if (ret_val)
+			return ret_val;
+		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 1;
+			return 0;
+		}
+
+		e_dbg("ERROR: No valid NVM bank present\n");
+		return -E1000_ERR_NVM;
 	case e1000_ich8lan:
 	case e1000_ich9lan:
 		eecd = er32(EECD);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0a854a47d31a..775e38910681 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1959,8 +1959,10 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data)
 	 * previous interrupt.
 	 */
 	if (rx_ring->set_itr) {
-		writel(1000000000 / (rx_ring->itr_val * 256),
-		       rx_ring->itr_register);
+		u32 itr = rx_ring->itr_val ?
+			  1000000000 / (rx_ring->itr_val * 256) : 0;
+
+		writel(itr, rx_ring->itr_register);
 		rx_ring->set_itr = 0;
 	}
 
@@ -7465,6 +7467,7 @@ static const struct pci_device_id e1000_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V), board_pch_spt },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM2), board_pch_spt },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V2), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LBG_I219_LM3), board_pch_spt },
 
 	{ 0, 0, 0, 0, 0, 0, 0 }	/* terminate list */
 };
@@ -7504,14 +7507,11 @@ static struct pci_driver e1000_driver = {
  **/
 static int __init e1000_init_module(void)
 {
-	int ret;
-
 	pr_info("Intel(R) PRO/1000 Network Driver - %s\n",
 		e1000e_driver_version);
 	pr_info("Copyright(c) 1999 - 2015 Intel Corporation.\n");
-	ret = pci_register_driver(&e1000_driver);
 
-	return ret;
+	return pci_register_driver(&e1000_driver);
 }
 module_init(e1000_init_module);
 
diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile
index 08859dd220a8..b006ff66d028 100644
--- a/drivers/net/ethernet/intel/fm10k/Makefile
+++ b/drivers/net/ethernet/intel/fm10k/Makefile
@@ -1,7 +1,7 @@
 ################################################################################
 #
 # Intel Ethernet Switch Host Interface Driver
-# Copyright(c) 2013 - 2014 Intel Corporation.
+# Copyright(c) 2013 - 2015 Intel Corporation.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms and conditions of the GNU General Public License,
@@ -27,7 +27,17 @@
 
 obj-$(CONFIG_FM10K) += fm10k.o
 
-fm10k-objs := fm10k_main.o fm10k_common.o fm10k_pci.o \
-	      fm10k_netdev.o fm10k_ethtool.o fm10k_pf.o fm10k_vf.o \
-	      fm10k_mbx.o fm10k_iov.o fm10k_tlv.o \
-	      fm10k_debugfs.o fm10k_ptp.o fm10k_dcbnl.o
+fm10k-y := fm10k_main.o \
+	   fm10k_common.o \
+	   fm10k_pci.o \
+	   fm10k_ptp.o \
+	   fm10k_netdev.o \
+	   fm10k_ethtool.o \
+	   fm10k_pf.o \
+	   fm10k_vf.o \
+	   fm10k_mbx.o \
+	   fm10k_iov.o \
+	   fm10k_tlv.o
+
+fm10k-$(CONFIG_DEBUG_FS) += fm10k_debugfs.o
+fm10k-$(CONFIG_DCB) += fm10k_dcbnl.o
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 14440200499b..b34bb008b104 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -23,6 +23,7 @@
 
 #include <linux/types.h>
 #include <linux/etherdevice.h>
+#include <linux/cpumask.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
 #include <linux/pci.h>
@@ -33,7 +34,7 @@
 #include "fm10k_pf.h"
 #include "fm10k_vf.h"
 
-#define FM10K_MAX_JUMBO_FRAME_SIZE	15358	/* Maximum supported size 15K */
+#define FM10K_MAX_JUMBO_FRAME_SIZE	15342	/* Maximum supported size 15K */
 
 #define MAX_QUEUES	FM10K_MAX_QUEUES_PF
 
@@ -66,6 +67,7 @@ struct fm10k_l2_accel {
 enum fm10k_ring_state_t {
 	__FM10K_TX_DETECT_HANG,
 	__FM10K_HANG_CHECK_ARMED,
+	__FM10K_TX_XPS_INIT_DONE,
 };
 
 #define check_for_tx_hang(ring) \
@@ -138,7 +140,7 @@ struct fm10k_ring {
 					 * different for DCB and RSS modes
 					 */
 	u8 qos_pc;			/* priority class of queue */
-	u16 vid;			/* default vlan ID of queue */
+	u16 vid;			/* default VLAN ID of queue */
 	u16 count;			/* amount of descriptors */
 
 	u16 next_to_alloc;
@@ -164,14 +166,20 @@ struct fm10k_ring_container {
 	unsigned int total_packets;	/* total packets processed this int */
 	u16 work_limit;			/* total work allowed per interrupt */
 	u16 itr;			/* interrupt throttle rate value */
+	u8 itr_scale;			/* ITR adjustment based on PCI speed */
 	u8 count;			/* total number of rings in vector */
 };
 
 #define FM10K_ITR_MAX		0x0FFF	/* maximum value for ITR */
 #define FM10K_ITR_10K		100	/* 100us */
 #define FM10K_ITR_20K		50	/* 50us */
+#define FM10K_ITR_40K		25	/* 25us */
 #define FM10K_ITR_ADAPTIVE	0x8000	/* adaptive interrupt moderation flag */
 
+#define ITR_IS_ADAPTIVE(itr) (!!(itr & FM10K_ITR_ADAPTIVE))
+
+#define FM10K_TX_ITR_DEFAULT	FM10K_ITR_40K
+#define FM10K_RX_ITR_DEFAULT	FM10K_ITR_20K
 #define FM10K_ITR_ENABLE	(FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR)
 
 static inline struct netdev_queue *txring_txq(const struct fm10k_ring *ring)
@@ -203,6 +211,7 @@ struct fm10k_q_vector {
 	struct fm10k_ring_container rx, tx;
 
 	struct napi_struct napi;
+	cpumask_t affinity_mask;
 	char name[IFNAMSIZ + 9];
 
 #ifdef CONFIG_DEBUG_FS
@@ -413,7 +422,7 @@ static inline u16 fm10k_desc_unused(struct fm10k_ring *ring)
 	 (&(((union fm10k_rx_desc *)((R)->desc))[i]))
 
 #define FM10K_MAX_TXD_PWR	14
-#define FM10K_MAX_DATA_PER_TXD	(1 << FM10K_MAX_TXD_PWR)
+#define FM10K_MAX_DATA_PER_TXD	BIT(FM10K_MAX_TXD_PWR)
 
 /* Tx Descriptors needed, worst case */
 #define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD)
@@ -434,7 +443,7 @@ union fm10k_ftag_info {
 	struct {
 		/* dglort and sglort combined into a single 32bit desc read */
 		__le32 glort;
-		/* upper 16 bits of vlan are reserved 0 for swpri_type_user */
+		/* upper 16 bits of VLAN are reserved 0 for swpri_type_user */
 		__le32 vlan;
 	} d;
 	struct {
@@ -484,7 +493,7 @@ void fm10k_netpoll(struct net_device *netdev);
 #endif
 
 /* Netdev */
-struct net_device *fm10k_alloc_netdev(void);
+struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info);
 int fm10k_setup_rx_resources(struct fm10k_ring *);
 int fm10k_setup_tx_resources(struct fm10k_ring *);
 void fm10k_free_rx_resources(struct fm10k_ring *);
@@ -551,5 +560,9 @@ int fm10k_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int fm10k_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 
 /* DCB */
+#ifdef CONFIG_DCB
 void fm10k_dcbnl_set_ops(struct net_device *dev);
+#else
+static inline void fm10k_dcbnl_set_ops(struct net_device *dev) {}
+#endif
 #endif /* _FM10K_H_ */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index 5c7a4d7662d8..2be4361839db 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -20,7 +20,6 @@
 
 #include "fm10k.h"
 
-#ifdef CONFIG_DCB
 /**
  * fm10k_dcbnl_ieee_getets - get the ETS configuration for the device
  * @dev: netdev interface for the device
@@ -155,7 +154,6 @@ static const struct dcbnl_rtnl_ops fm10k_dcbnl_ops = {
 	.setdcbx	= fm10k_dcbnl_setdcbx,
 };
 
-#endif /* CONFIG_DCB */
 /**
  * fm10k_dcbnl_set_ops - Configures dcbnl ops pointer for netdev
  * @dev: netdev interface for the device
@@ -164,11 +162,9 @@ static const struct dcbnl_rtnl_ops fm10k_dcbnl_ops = {
  **/
 void fm10k_dcbnl_set_ops(struct net_device *dev)
 {
-#ifdef CONFIG_DCB
 	struct fm10k_intfc *interface = netdev_priv(dev);
 	struct fm10k_hw *hw = &interface->hw;
 
 	if (hw->mac.type == fm10k_mac_pf)
 		dev->dcbnl_ops = &fm10k_dcbnl_ops;
-#endif /* CONFIG_DCB */
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 5304bc1fbecd..5d6137faf7d1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -18,8 +18,6 @@
  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  */
 
-#ifdef CONFIG_DEBUG_FS
-
 #include "fm10k.h"
 
 #include <linux/debugfs.h>
@@ -258,5 +256,3 @@ void fm10k_dbg_exit(void)
 	debugfs_remove_recursive(dbg_root);
 	dbg_root = NULL;
 }
-
-#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 2ce0eba5e040..2f6a05b57228 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -111,12 +111,14 @@ static const struct fm10k_stats fm10k_gstrings_pf_stats[] = {
 
 static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = {
 	FM10K_MBX_STAT("mbx_tx_busy", tx_busy),
-	FM10K_MBX_STAT("mbx_tx_oversized", tx_dropped),
+	FM10K_MBX_STAT("mbx_tx_dropped", tx_dropped),
 	FM10K_MBX_STAT("mbx_tx_messages", tx_messages),
 	FM10K_MBX_STAT("mbx_tx_dwords", tx_dwords),
+	FM10K_MBX_STAT("mbx_tx_mbmem_pulled", tx_mbmem_pulled),
 	FM10K_MBX_STAT("mbx_rx_messages", rx_messages),
 	FM10K_MBX_STAT("mbx_rx_dwords", rx_dwords),
 	FM10K_MBX_STAT("mbx_rx_parse_err", rx_parse_err),
+	FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed),
 };
 
 #define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats)
@@ -125,7 +127,7 @@ static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = {
 #define FM10K_MBX_STATS_LEN ARRAY_SIZE(fm10k_gstrings_mbx_stats)
 
 #define FM10K_QUEUE_STATS_LEN(_n) \
-	( (_n) * 2 * (sizeof(struct fm10k_queue_stats) / sizeof(u64)))
+	((_n) * 2 * (sizeof(struct fm10k_queue_stats) / sizeof(u64)))
 
 #define FM10K_STATIC_STATS_LEN (FM10K_GLOBAL_STATS_LEN + \
 				FM10K_NETDEV_STATS_LEN + \
@@ -257,7 +259,8 @@ static int fm10k_get_sset_count(struct net_device *dev, int sset)
 			stats_len += FM10K_DEBUG_STATS_LEN;
 
 			if (iov_data)
-				stats_len += FM10K_MBX_STATS_LEN * iov_data->num_vfs;
+				stats_len += FM10K_MBX_STATS_LEN *
+					iov_data->num_vfs;
 		}
 
 		return stats_len;
@@ -296,14 +299,16 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev,
 
 	if (interface->flags & FM10K_FLAG_DEBUG_STATS) {
 		for (i = 0; i < FM10K_DEBUG_STATS_LEN; i++) {
-			p = (char *)interface + fm10k_gstrings_debug_stats[i].stat_offset;
+			p = (char *)interface +
+				fm10k_gstrings_debug_stats[i].stat_offset;
 			*(data++) = (fm10k_gstrings_debug_stats[i].sizeof_stat ==
 				     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 		}
 	}
 
 	for (i = 0; i < FM10K_MBX_STATS_LEN; i++) {
-		p = (char *)&interface->hw.mbx + fm10k_gstrings_mbx_stats[i].stat_offset;
+		p = (char *)&interface->hw.mbx +
+			fm10k_gstrings_mbx_stats[i].stat_offset;
 		*(data++) = (fm10k_gstrings_mbx_stats[i].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
@@ -320,6 +325,7 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev,
 	if ((interface->flags & FM10K_FLAG_DEBUG_STATS) && iov_data) {
 		for (i = 0; i < iov_data->num_vfs; i++) {
 			struct fm10k_vf_info *vf_info;
+
 			vf_info = &iov_data->vf_info[i];
 
 			/* skip stats if we don't have a vf info */
@@ -329,7 +335,8 @@ static void fm10k_get_ethtool_stats(struct net_device *netdev,
 			}
 
 			for (j = 0; j < FM10K_MBX_STATS_LEN; j++) {
-				p = (char *)&vf_info->mbx + fm10k_gstrings_mbx_stats[j].stat_offset;
+				p = (char *)&vf_info->mbx +
+					fm10k_gstrings_mbx_stats[j].stat_offset;
 				*(data++) = (fm10k_gstrings_mbx_stats[j].sizeof_stat ==
 					     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 			}
@@ -699,12 +706,10 @@ static int fm10k_get_coalesce(struct net_device *dev,
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
 
-	ec->use_adaptive_tx_coalesce =
-		!!(interface->tx_itr & FM10K_ITR_ADAPTIVE);
+	ec->use_adaptive_tx_coalesce = ITR_IS_ADAPTIVE(interface->tx_itr);
 	ec->tx_coalesce_usecs = interface->tx_itr & ~FM10K_ITR_ADAPTIVE;
 
-	ec->use_adaptive_rx_coalesce =
-		!!(interface->rx_itr & FM10K_ITR_ADAPTIVE);
+	ec->use_adaptive_rx_coalesce = ITR_IS_ADAPTIVE(interface->rx_itr);
 	ec->rx_coalesce_usecs = interface->rx_itr & ~FM10K_ITR_ADAPTIVE;
 
 	return 0;
@@ -729,10 +734,10 @@ static int fm10k_set_coalesce(struct net_device *dev,
 
 	/* set initial values for adaptive ITR */
 	if (ec->use_adaptive_tx_coalesce)
-		tx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_10K;
+		tx_itr = FM10K_ITR_ADAPTIVE | FM10K_TX_ITR_DEFAULT;
 
 	if (ec->use_adaptive_rx_coalesce)
-		rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K;
+		rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
 
 	/* update interface */
 	interface->tx_itr = tx_itr;
@@ -1020,7 +1025,6 @@ static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 	return 0;
 }
 
-
 static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
 {
 	return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index e76a44cf330c..75ff1092b7ee 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,7 +28,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.15.2-k"
+#define DRV_VERSION	"0.19.3-k"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] =
@@ -917,7 +917,7 @@ static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
 	/* set timestamping bits */
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 	    likely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
-			desc_flags |= FM10K_TXD_FLAG_TIME;
+		desc_flags |= FM10K_TXD_FLAG_TIME;
 
 	/* set checksum offload bits */
 	desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
@@ -1094,11 +1094,11 @@ dma_error:
 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
 				  struct fm10k_ring *tx_ring)
 {
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
 	struct fm10k_tx_buffer *first;
-	int tso;
-	u32 tx_flags = 0;
 	unsigned short f;
-	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	u32 tx_flags = 0;
+	int tso;
 
 	/* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
@@ -1363,10 +1363,10 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
  **/
 static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
 {
-	unsigned int avg_wire_size, packets;
+	unsigned int avg_wire_size, packets, itr_round;
 
 	/* Only update ITR if we are using adaptive setting */
-	if (!(ring_container->itr & FM10K_ITR_ADAPTIVE))
+	if (!ITR_IS_ADAPTIVE(ring_container->itr))
 		goto clear_counts;
 
 	packets = ring_container->total_packets;
@@ -1375,18 +1375,44 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
 
 	avg_wire_size = ring_container->total_bytes / packets;
 
-	/* Add 24 bytes to size to account for CRC, preamble, and gap */
-	avg_wire_size += 24;
-
-	/* Don't starve jumbo frames */
-	if (avg_wire_size > 3000)
-		avg_wire_size = 3000;
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (34 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 360) {
+		/* Start at 250K ints/sec and gradually drop to 77K ints/sec */
+		avg_wire_size *= 8;
+		avg_wire_size += 376;
+	} else if (avg_wire_size <= 1152) {
+		/* 77K ints/sec to 45K ints/sec */
+		avg_wire_size *= 3;
+		avg_wire_size += 2176;
+	} else if (avg_wire_size <= 1920) {
+		/* 45K ints/sec to 38K ints/sec */
+		avg_wire_size += 4480;
+	} else {
+		/* plateau at a limit of 38K ints/sec */
+		avg_wire_size = 6656;
+	}
 
-	/* Give a little boost to mid-size frames */
-	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
-		avg_wire_size /= 3;
-	else
-		avg_wire_size /= 2;
+	/* Perform final bitshift for division after rounding up to ensure
+	 * that the calculation will never get below a 1. The bit shift
+	 * accounts for changes in the ITR due to PCIe link speed.
+	 */
+	itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8;
+	avg_wire_size += (1 << itr_round) - 1;
+	avg_wire_size >>= itr_round;
 
 	/* write back value and retain adaptive flag */
 	ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE;
@@ -1428,11 +1454,15 @@ static int fm10k_poll(struct napi_struct *napi, int budget)
 	fm10k_for_each_ring(ring, q_vector->tx)
 		clean_complete &= fm10k_clean_tx_irq(q_vector, ring);
 
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		return budget;
+
 	/* attempt to distribute budget to each queue fairly, but don't
 	 * allow the budget to go below 1 because we'll exit polling
 	 */
 	if (q_vector->rx.count > 1)
-		per_ring_budget = max(budget/q_vector->rx.count, 1);
+		per_ring_budget = max(budget / q_vector->rx.count, 1);
 	else
 		per_ring_budget = budget;
 
@@ -1600,6 +1630,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
 	q_vector->tx.ring = ring;
 	q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
 	q_vector->tx.itr = interface->tx_itr;
+	q_vector->tx.itr_scale = interface->hw.mac.itr_scale;
 	q_vector->tx.count = txr_count;
 
 	while (txr_count) {
@@ -1628,6 +1659,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
 	/* save Rx ring container info */
 	q_vector->rx.ring = ring;
 	q_vector->rx.itr = interface->rx_itr;
+	q_vector->rx.itr_scale = interface->hw.mac.itr_scale;
 	q_vector->rx.count = rxr_count;
 
 	while (rxr_count) {
@@ -1966,8 +1998,10 @@ int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
 
 	/* Allocate memory for queues */
 	err = fm10k_alloc_q_vectors(interface);
-	if (err)
+	if (err) {
+		fm10k_reset_msix_capability(interface);
 		return err;
+	}
 
 	/* Map rings to devices, and map devices to physical queues */
 	fm10k_assign_rings(interface);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index af09a1b272e6..c7fea47b8909 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -375,6 +375,8 @@ static void fm10k_mbx_write_copy(struct fm10k_hw *hw,
 			if (!tail)
 				tail++;
 
+			mbx->tx_mbmem_pulled++;
+
 			/* write message to hardware FIFO */
 			fm10k_write_reg(hw, mbmem + tail++, *(head++));
 		} while (--len && --end);
@@ -459,6 +461,8 @@ static void fm10k_mbx_read_copy(struct fm10k_hw *hw,
 			if (!head)
 				head++;
 
+			mbx->rx_mbmem_pushed++;
+
 			/* read message from hardware FIFO */
 			*(tail++) = fm10k_read_reg(hw, mbmem + head++);
 		} while (--len && --end);
@@ -899,7 +903,7 @@ static void fm10k_mbx_create_disconnect_hdr(struct fm10k_mbx_info *mbx)
 }
 
 /**
- *  fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mailbox header
+ *  fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mbox hdr
  *  @mbx: pointer to mailbox
  *
  *  This function creates a fake disconnect header for loading into remote
@@ -2136,6 +2140,7 @@ s32 fm10k_sm_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
 {
 	mbx->mbx_reg = FM10K_GMBX;
 	mbx->mbmem_reg = FM10K_MBMEM_PF(0);
+
 	/* start out in closed state */
 	mbx->state = FM10K_STATE_CLOSED;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
index 0419a7f0035e..c4f18a8f176c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -291,8 +291,10 @@ struct fm10k_mbx_info {
 	u64 tx_dropped;
 	u64 tx_messages;
 	u64 tx_dwords;
+	u64 tx_mbmem_pulled;
 	u64 rx_messages;
 	u64 rx_dwords;
+	u64 rx_mbmem_pushed;
 	u64 rx_parse_err;
 
 	/* Buffer to store messages */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 639263d5e833..d9854d39576d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -608,7 +608,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	unsigned int r_idx = skb->queue_mapping;
 	int err;
 
-	if ((skb->protocol ==  htons(ETH_P_8021Q)) &&
+	if ((skb->protocol == htons(ETH_P_8021Q)) &&
 	    !skb_vlan_tag_present(skb)) {
 		/* FM10K only supports hardware tagging, any tags in frame
 		 * are considered 2nd level or "outer" tags
@@ -627,10 +627,12 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 
 		/* verify the skb head is not shared */
 		err = skb_cow_head(skb, 0);
-		if (err)
+		if (err) {
+			dev_kfree_skb(skb);
 			return NETDEV_TX_OK;
+		}
 
-		/* locate vlan header */
+		/* locate VLAN header */
 		vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
 
 		/* pull the 2 key pieces of data out of it */
@@ -703,7 +705,7 @@ static void fm10k_tx_timeout(struct net_device *netdev)
 	} else {
 		netif_info(interface, drv, netdev,
 			   "Fake Tx hang detected with timeout of %d seconds\n",
-			   netdev->watchdog_timeo/HZ);
+			   netdev->watchdog_timeo / HZ);
 
 		/* fake Tx hang - increase the kernel timeout */
 		if (netdev->watchdog_timeo < TX_TIMEO_LIMIT)
@@ -776,7 +778,7 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 	if (!set)
 		clear_bit(vid, interface->active_vlans);
 
-	/* disable the default VID on ring if we have an active VLAN */
+	/* disable the default VLAN ID on ring if we have an active VLAN */
 	for (i = 0; i < interface->num_rx_queues; i++) {
 		struct fm10k_ring *rx_ring = interface->rx_ring[i];
 		u16 rx_vid = rx_ring->vid & (VLAN_N_VID - 1);
@@ -787,7 +789,9 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 			rx_ring->vid &= ~FM10K_VLAN_CLEAR;
 	}
 
-	/* Do not remove default VID related entries from VLAN and MAC tables */
+	/* Do not remove default VLAN ID related entries from VLAN and MAC
+	 * tables
+	 */
 	if (!set && vid == hw->mac.default_vid)
 		return 0;
 
@@ -812,7 +816,7 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 	if (err)
 		goto err_out;
 
-	/* set vid prior to syncing/unsyncing the VLAN */
+	/* set VLAN ID prior to syncing/unsyncing the VLAN */
 	interface->vid = vid + (set ? VLAN_N_VID : 0);
 
 	/* Update the unicast and multicast address list to add/drop VLAN */
@@ -1386,8 +1390,9 @@ static const struct net_device_ops fm10k_netdev_ops = {
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 
-struct net_device *fm10k_alloc_netdev(void)
+struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info)
 {
+	netdev_features_t hw_features;
 	struct fm10k_intfc *interface;
 	struct net_device *dev;
 
@@ -1410,27 +1415,31 @@ struct net_device *fm10k_alloc_netdev(void)
 			 NETIF_F_TSO |
 			 NETIF_F_TSO6 |
 			 NETIF_F_TSO_ECN |
-			 NETIF_F_GSO_UDP_TUNNEL |
 			 NETIF_F_RXHASH |
 			 NETIF_F_RXCSUM;
 
+	/* Only the PF can support VXLAN and NVGRE tunnel offloads */
+	if (info->mac == fm10k_mac_pf) {
+		dev->hw_enc_features = NETIF_F_IP_CSUM |
+				       NETIF_F_TSO |
+				       NETIF_F_TSO6 |
+				       NETIF_F_TSO_ECN |
+				       NETIF_F_GSO_UDP_TUNNEL |
+				       NETIF_F_IPV6_CSUM |
+				       NETIF_F_SG;
+
+		dev->features |= NETIF_F_GSO_UDP_TUNNEL;
+	}
+
 	/* all features defined to this point should be changeable */
-	dev->hw_features |= dev->features;
+	hw_features = dev->features;
 
 	/* allow user to enable L2 forwarding acceleration */
-	dev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+	hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
 
 	/* configure VLAN features */
 	dev->vlan_features |= dev->features;
 
-	/* configure tunnel offloads */
-	dev->hw_enc_features |= NETIF_F_IP_CSUM |
-				NETIF_F_TSO |
-				NETIF_F_TSO6 |
-				NETIF_F_TSO_ECN |
-				NETIF_F_GSO_UDP_TUNNEL |
-				NETIF_F_IPV6_CSUM;
-
 	/* we want to leave these both on as we cannot disable VLAN tag
 	 * insertion or stripping on the hardware since it is contained
 	 * in the FTAG and not in the frame itself.
@@ -1441,5 +1450,7 @@ struct net_device *fm10k_alloc_netdev(void)
 
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
+	dev->hw_features |= hw_features;
+
 	return dev;
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 74be792f3f1b..020f6dce4154 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -159,13 +159,31 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
 
 	fm10k_mbx_free_irq(interface);
 
+	/* free interrupts */
+	fm10k_clear_queueing_scheme(interface);
+
 	/* delay any future reset requests */
 	interface->last_reset = jiffies + (10 * HZ);
 
 	/* reset and initialize the hardware so it is in a known state */
-	err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw);
-	if (err)
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_err(&interface->pdev->dev, "reset_hw failed: %d\n", err);
+		goto reinit_err;
+	}
+
+	err = hw->mac.ops.init_hw(hw);
+	if (err) {
 		dev_err(&interface->pdev->dev, "init_hw failed: %d\n", err);
+		goto reinit_err;
+	}
+
+	err = fm10k_init_queueing_scheme(interface);
+	if (err) {
+		dev_err(&interface->pdev->dev,
+			"init_queueing_scheme failed: %d\n", err);
+		goto reinit_err;
+	}
 
 	/* reassociate interrupts */
 	fm10k_mbx_request_irq(interface);
@@ -193,6 +211,10 @@ static void fm10k_reinit(struct fm10k_intfc *interface)
 
 	fm10k_iov_resume(interface->pdev);
 
+reinit_err:
+	if (err)
+		netif_device_detach(netdev);
+
 	rtnl_unlock();
 
 	clear_bit(__FM10K_RESETTING, &interface->state);
@@ -563,7 +585,7 @@ static void fm10k_configure_tx_ring(struct fm10k_intfc *interface,
 	/* store tail pointer */
 	ring->tail = &interface->uc_addr[FM10K_TDT(reg_idx)];
 
-	/* reset ntu and ntc to place SW in sync with hardwdare */
+	/* reset ntu and ntc to place SW in sync with hardware */
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
 
@@ -579,6 +601,13 @@ static void fm10k_configure_tx_ring(struct fm10k_intfc *interface,
 	fm10k_write_reg(hw, FM10K_PFVTCTL(reg_idx),
 			FM10K_PFVTCTL_FTAG_DESC_ENABLE);
 
+	/* Initialize XPS */
+	if (!test_and_set_bit(__FM10K_TX_XPS_INIT_DONE, &ring->state) &&
+	    ring->q_vector)
+		netif_set_xps_queue(ring->netdev,
+				    &ring->q_vector->affinity_mask,
+				    ring->queue_index);
+
 	/* enable queue */
 	fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), txdctl);
 }
@@ -669,7 +698,7 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
 	/* store tail pointer */
 	ring->tail = &interface->uc_addr[FM10K_RDT(reg_idx)];
 
-	/* reset ntu and ntc to place SW in sync with hardwdare */
+	/* reset ntu and ntc to place SW in sync with hardware */
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
 	ring->next_to_alloc = 0;
@@ -694,7 +723,7 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
 	/* assign default VLAN to queue */
 	ring->vid = hw->mac.default_vid;
 
-	/* if we have an active VLAN, disable default VID */
+	/* if we have an active VLAN, disable default VLAN ID */
 	if (test_bit(hw->mac.default_vid, interface->active_vlans))
 		ring->vid |= FM10K_VLAN_CLEAR;
 
@@ -846,7 +875,7 @@ static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data)
 	struct fm10k_q_vector *q_vector = data;
 
 	if (q_vector->rx.count || q_vector->tx.count)
-		napi_schedule(&q_vector->napi);
+		napi_schedule_irqoff(&q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -859,7 +888,8 @@ static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data)
 
 	/* re-enable mailbox interrupt and indicate 20us delay */
 	fm10k_write_reg(hw, FM10K_VFITR(FM10K_MBX_VECTOR),
-			FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY);
+			FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >>
+					    hw->mac.itr_scale));
 
 	/* service upstream mailbox */
 	if (fm10k_mbx_trylock(interface)) {
@@ -897,7 +927,7 @@ void fm10k_netpoll(struct net_device *netdev)
 #endif
 #define FM10K_ERR_MSG(type) case (type): error = #type; break
 static void fm10k_handle_fault(struct fm10k_intfc *interface, int type,
-			      struct fm10k_fault *fault)
+			       struct fm10k_fault *fault)
 {
 	struct pci_dev *pdev = interface->pdev;
 	struct fm10k_hw *hw = &interface->hw;
@@ -1090,7 +1120,8 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data)
 
 	/* re-enable mailbox interrupt and indicate 20us delay */
 	fm10k_write_reg(hw, FM10K_ITR(FM10K_MBX_VECTOR),
-			FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY);
+			FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >>
+					    hw->mac.itr_scale));
 
 	return IRQ_HANDLED;
 }
@@ -1101,6 +1132,10 @@ void fm10k_mbx_free_irq(struct fm10k_intfc *interface)
 	struct fm10k_hw *hw = &interface->hw;
 	int itr_reg;
 
+	/* no mailbox IRQ to free if MSI-X is not enabled */
+	if (!interface->msix_entries)
+		return;
+
 	/* disconnect the mailbox */
 	hw->mbx.ops.disconnect(hw, &hw->mbx);
 
@@ -1269,7 +1304,7 @@ static s32 fm10k_update_pvid(struct fm10k_hw *hw, u32 **results,
 	if (!fm10k_glort_valid_pf(hw, glort))
 		return FM10K_ERR_PARAM;
 
-	/* verify VID is valid */
+	/* verify VLAN ID is valid */
 	if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
 		return FM10K_ERR_PARAM;
 
@@ -1388,14 +1423,14 @@ static int fm10k_mbx_request_irq_pf(struct fm10k_intfc *interface)
 	}
 
 	/* Enable interrupts w/ no moderation for "other" interrupts */
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_PCIeFault), other_itr);
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_SwitchUpDown), other_itr);
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_SRAM), other_itr);
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_MaxHoldTime), other_itr);
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_VFLR), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_sram), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_max_hold_time), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_vflr), other_itr);
 
 	/* Enable interrupts w/ moderation for mailbox */
-	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_Mailbox), mbx_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_mailbox), mbx_itr);
 
 	/* Enable individual interrupt causes */
 	fm10k_write_reg(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) |
@@ -1423,10 +1458,15 @@ int fm10k_mbx_request_irq(struct fm10k_intfc *interface)
 		err = fm10k_mbx_request_irq_pf(interface);
 	else
 		err = fm10k_mbx_request_irq_vf(interface);
+	if (err)
+		return err;
 
 	/* connect mailbox */
-	if (!err)
-		err = hw->mbx.ops.connect(hw, &hw->mbx);
+	err = hw->mbx.ops.connect(hw, &hw->mbx);
+
+	/* if the mailbox failed to connect, then free IRQ */
+	if (err)
+		fm10k_mbx_free_irq(interface);
 
 	return err;
 }
@@ -1455,8 +1495,10 @@ void fm10k_qv_free_irq(struct fm10k_intfc *interface)
 		if (!q_vector->tx.count && !q_vector->rx.count)
 			continue;
 
-		/* disable interrupts */
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(entry->vector, NULL);
 
+		/* disable interrupts */
 		writel(FM10K_ITR_MASK_SET, q_vector->itr);
 
 		free_irq(entry->vector, q_vector);
@@ -1514,6 +1556,9 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface)
 			goto err_out;
 		}
 
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(entry->vector, &q_vector->affinity_mask);
+
 		/* Enable q_vector */
 		writel(FM10K_ITR_ENABLE, q_vector->itr);
 
@@ -1534,8 +1579,10 @@ err_out:
 		if (!q_vector->tx.count && !q_vector->rx.count)
 			continue;
 
-		/* disable interrupts */
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(entry->vector, NULL);
 
+		/* disable interrupts */
 		writel(FM10K_ITR_MASK_SET, q_vector->itr);
 
 		free_irq(entry->vector, q_vector);
@@ -1684,7 +1731,13 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 	interface->last_reset = jiffies + (10 * HZ);
 
 	/* reset and initialize the hardware so it is in a known state */
-	err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw);
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_err(&pdev->dev, "reset_hw failed: %d\n", err);
+		return err;
+	}
+
+	err = hw->mac.ops.init_hw(hw);
 	if (err) {
 		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
 		return err;
@@ -1722,13 +1775,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 					     pci_resource_len(pdev, 4));
 	hw->sw_addr = interface->sw_addr;
 
-	/* Only the PF can support VXLAN and NVGRE offloads */
-	if (hw->mac.type != fm10k_mac_pf) {
-		netdev->hw_enc_features = 0;
-		netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
-		netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
-	}
-
 	/* initialize DCBNL interface */
 	fm10k_dcbnl_set_ops(netdev);
 
@@ -1749,8 +1795,8 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 	interface->rx_ring_count = FM10K_DEFAULT_RXD;
 
 	/* set default interrupt moderation */
-	interface->tx_itr = FM10K_ITR_10K;
-	interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K;
+	interface->tx_itr = FM10K_TX_ITR_DEFAULT;
+	interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
 
 	/* initialize vxlan_port list */
 	INIT_LIST_HEAD(&interface->vxlan_port);
@@ -1835,17 +1881,18 @@ static void fm10k_slot_warn(struct fm10k_intfc *interface)
 		return;
 	}
 
-	if (max_gts < expected_gts) {
-		dev_warn(&interface->pdev->dev,
-			 "This device requires %dGT/s of bandwidth for optimal performance.\n",
-			 expected_gts);
-		dev_warn(&interface->pdev->dev,
-			 "A %sslot with x%d lanes is suggested.\n",
-			 (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " :
-			  hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " :
-			  hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""),
-			 hw->bus_caps.width);
-	}
+	if (max_gts >= expected_gts)
+		return;
+
+	dev_warn(&interface->pdev->dev,
+		 "This device requires %dGT/s of bandwidth for optimal performance.\n",
+		 expected_gts);
+	dev_warn(&interface->pdev->dev,
+		 "A %sslot with x%d lanes is suggested.\n",
+		 (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " :
+		  hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " :
+		  hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""),
+		 hw->bus_caps.width);
 }
 
 /**
@@ -1859,8 +1906,7 @@ static void fm10k_slot_warn(struct fm10k_intfc *interface)
  * The OS initialization, configuring of the interface private structure,
  * and a hardware reset occur.
  **/
-static int fm10k_probe(struct pci_dev *pdev,
-		       const struct pci_device_id *ent)
+static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *netdev;
 	struct fm10k_intfc *interface;
@@ -1894,7 +1940,7 @@ static int fm10k_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 	pci_save_state(pdev);
 
-	netdev = fm10k_alloc_netdev();
+	netdev = fm10k_alloc_netdev(fm10k_info_tbl[ent->driver_data]);
 	if (!netdev) {
 		err = -ENOMEM;
 		goto err_alloc_netdev;
@@ -2071,8 +2117,10 @@ static int fm10k_resume(struct pci_dev *pdev)
 
 	/* reset hardware to known state */
 	err = hw->mac.ops.init_hw(&interface->hw);
-	if (err)
+	if (err) {
+		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
 		return err;
+	}
 
 	/* reset statistics starting values */
 	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
@@ -2185,6 +2233,9 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev,
 	if (netif_running(netdev))
 		fm10k_close(netdev);
 
+	/* free interrupts */
+	fm10k_clear_queueing_scheme(interface);
+
 	fm10k_mbx_free_irq(interface);
 
 	pci_disable_device(pdev);
@@ -2248,11 +2299,22 @@ static void fm10k_io_resume(struct pci_dev *pdev)
 	int err = 0;
 
 	/* reset hardware to known state */
-	hw->mac.ops.init_hw(&interface->hw);
+	err = hw->mac.ops.init_hw(&interface->hw);
+	if (err) {
+		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
+		return;
+	}
 
 	/* reset statistics starting values */
 	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
 
+	err = fm10k_init_queueing_scheme(interface);
+	if (err) {
+		dev_err(&interface->pdev->dev,
+			"init_queueing_scheme failed: %d\n", err);
+		return;
+	}
+
 	/* reassociate interrupts */
 	fm10k_mbx_request_irq(interface);
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 8c0bdc4e4edd..808307e67718 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -150,19 +150,26 @@ static s32 fm10k_init_hw_pf(struct fm10k_hw *hw)
 				FM10K_TPH_RXCTRL_HDR_WROEN);
 	}
 
-	/* set max hold interval to align with 1.024 usec in all modes */
+	/* set max hold interval to align with 1.024 usec in all modes and
+	 * store ITR scale
+	 */
 	switch (hw->bus.speed) {
 	case fm10k_bus_speed_2500:
 		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN1;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN1;
 		break;
 	case fm10k_bus_speed_5000:
 		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN2;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN2;
 		break;
 	case fm10k_bus_speed_8000:
 		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN3;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3;
 		break;
 	default:
 		dma_ctrl = 0;
+		/* just in case, assume Gen3 ITR scale */
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3;
 		break;
 	}
 
@@ -259,7 +266,6 @@ static s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw)
 {
 	u8 perm_addr[ETH_ALEN];
 	u32 serial_num;
-	int i;
 
 	serial_num = fm10k_read_reg(hw, FM10K_SM_AREA(1));
 
@@ -281,10 +287,8 @@ static s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw)
 	perm_addr[4] = (u8)(serial_num >> 8);
 	perm_addr[5] = (u8)(serial_num);
 
-	for (i = 0; i < ETH_ALEN; i++) {
-		hw->mac.perm_addr[i] = perm_addr[i];
-		hw->mac.addr[i] = perm_addr[i];
-	}
+	ether_addr_copy(hw->mac.perm_addr, perm_addr);
+	ether_addr_copy(hw->mac.addr, perm_addr);
 
 	return 0;
 }
@@ -325,7 +329,7 @@ static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort,
 	/* clear set bit from VLAN ID */
 	vid &= ~FM10K_VLAN_CLEAR;
 
-	/* if glort or vlan are not valid return error */
+	/* if glort or VLAN are not valid return error */
 	if (!fm10k_glort_valid_pf(hw, glort) || vid >= FM10K_VLAN_TABLE_VID_MAX)
 		return FM10K_ERR_PARAM;
 
@@ -334,8 +338,8 @@ static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort,
 						 ((u32)mac[3] << 16) |
 						 ((u32)mac[4] << 8) |
 						 ((u32)mac[5]));
-	mac_update.mac_upper = cpu_to_le16(((u32)mac[0] << 8) |
-						 ((u32)mac[1]));
+	mac_update.mac_upper = cpu_to_le16(((u16)mac[0] << 8) |
+					   ((u16)mac[1]));
 	mac_update.vlan = cpu_to_le16(vid);
 	mac_update.glort = cpu_to_le16(glort);
 	mac_update.action = add ? 0 : 1;
@@ -410,6 +414,7 @@ static s32 fm10k_update_xcast_mode_pf(struct fm10k_hw *hw, u16 glort, u8 mode)
 
 	if (mode > FM10K_XCAST_MODE_NONE)
 		return FM10K_ERR_PARAM;
+
 	/* if glort is not valid return error */
 	if (!fm10k_glort_valid_pf(hw, glort))
 		return FM10K_ERR_PARAM;
@@ -903,6 +908,13 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
 	fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx), tdbal);
 	fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx), tdbah);
 
+	/* Provide the VF the ITR scale, using software-defined fields in TDLEN
+	 * to pass the information during VF initialization. See definition of
+	 * FM10K_TDLEN_ITR_SCALE_SHIFT for more details.
+	 */
+	fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx), hw->mac.itr_scale <<
+						   FM10K_TDLEN_ITR_SCALE_SHIFT);
+
 err_out:
 	/* configure Queue control register */
 	txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
@@ -910,7 +922,7 @@ err_out:
 	txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
 		  FM10K_TXQCTL_VF | vf_idx;
 
-	/* assign VID */
+	/* assign VLAN ID */
 	for (i = 0; i < queues_per_pool; i++)
 		fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
 
@@ -1035,6 +1047,12 @@ static s32 fm10k_iov_reset_resources_pf(struct fm10k_hw *hw,
 	for (i = queues_per_pool; i--;) {
 		fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx + i), tdbal);
 		fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx + i), tdbah);
+		/* See definition of FM10K_TDLEN_ITR_SCALE_SHIFT for an
+		 * explanation of how TDLEN is used.
+		 */
+		fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx + i),
+				hw->mac.itr_scale <<
+				FM10K_TDLEN_ITR_SCALE_SHIFT);
 		fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx + i), vf_q_idx + i);
 		fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx + i), vf_q_idx + i);
 	}
@@ -1155,14 +1173,14 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 }
 
 /**
- * fm10k_iov_select_vid - Select correct default VID
+ * fm10k_iov_select_vid - Select correct default VLAN ID
  * @hw: Pointer to hardware structure
- * @vid: VID to correct
+ * @vid: VLAN ID to correct
  *
- * Will report an error if VID is out of range. For VID = 0, it will return
- * either the pf_vid or sw_vid depending on which one is set.
+ * Will report an error if the VLAN ID is out of range. For VID = 0, it will
+ * return either the pf_vid or sw_vid depending on which one is set.
  */
-static inline s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
+static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
 {
 	if (!vid)
 		return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid;
@@ -1212,11 +1230,11 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
 		set = !(vid & FM10K_VLAN_CLEAR);
 		vid &= ~FM10K_VLAN_CLEAR;
 
-		err = fm10k_iov_select_vid(vf_info, vid);
+		err = fm10k_iov_select_vid(vf_info, (u16)vid);
 		if (err < 0)
 			return err;
-		else
-			vid = err;
+
+		vid = err;
 
 		/* update VSI info for VF in regards to VLAN table */
 		err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
@@ -1241,8 +1259,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
 		err = fm10k_iov_select_vid(vf_info, vlan);
 		if (err < 0)
 			return err;
-		else
-			vlan = err;
+
+		vlan = (u16)err;
 
 		/* notify switch of request for new unicast address */
 		err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
@@ -1267,8 +1285,8 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
 		err = fm10k_iov_select_vid(vf_info, vlan);
 		if (err < 0)
 			return err;
-		else
-			vlan = err;
+
+		vlan = (u16)err;
 
 		/* notify switch of request for new multicast address */
 		err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
@@ -1396,14 +1414,6 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results,
 	return err;
 }
 
-const struct fm10k_msg_data fm10k_iov_msg_data_pf[] = {
-	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
-	FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf),
-	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf),
-	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf),
-	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
-};
-
 /**
  *  fm10k_update_stats_hw_pf - Updates hardware related statistics of PF
  *  @hw: pointer to hardware structure
@@ -1431,9 +1441,10 @@ static void fm10k_update_hw_stats_pf(struct fm10k_hw *hw,
 		xec = fm10k_read_hw_stats_32b(hw, FM10K_STATS_XEC, &stats->xec);
 		vlan_drop = fm10k_read_hw_stats_32b(hw, FM10K_STATS_VLAN_DROP,
 						    &stats->vlan_drop);
-		loopback_drop = fm10k_read_hw_stats_32b(hw,
-							FM10K_STATS_LOOPBACK_DROP,
-							&stats->loopback_drop);
+		loopback_drop =
+			fm10k_read_hw_stats_32b(hw,
+						FM10K_STATS_LOOPBACK_DROP,
+						&stats->loopback_drop);
 		nodesc_drop = fm10k_read_hw_stats_32b(hw,
 						      FM10K_STATS_NODESC_DROP,
 						      &stats->nodesc_drop);
@@ -1678,8 +1689,8 @@ const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[] = {
  *
  *  This handler configures the default VLAN for the PF
  **/
-s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
-			     struct fm10k_mbx_info *mbx)
+static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
+				    struct fm10k_mbx_info *mbx)
 {
 	u16 glort, pvid;
 	u32 pvid_update;
@@ -1698,7 +1709,7 @@ s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
 	if (!fm10k_glort_valid_pf(hw, glort))
 		return FM10K_ERR_PARAM;
 
-	/* verify VID is valid */
+	/* verify VLAN ID is valid */
 	if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
 		return FM10K_ERR_PARAM;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index 40a0dbc62a04..a8fc512a2416 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -107,8 +107,6 @@ extern const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[];
 #define FM10K_PF_MSG_LPORT_MAP_HANDLER(func) \
 	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_MAP, \
 			  fm10k_lport_map_msg_attr, func)
-s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *, u32 **,
-			     struct fm10k_mbx_info *);
 extern const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[];
 #define FM10K_PF_MSG_UPDATE_PVID_HANDLER(func) \
 	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_UPDATE_PVID, \
@@ -129,7 +127,6 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
 			      struct fm10k_mbx_info *);
 s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
 				 struct fm10k_mbx_info *);
-extern const struct fm10k_msg_data fm10k_iov_msg_data_pf[];
 
 extern struct fm10k_info fm10k_pf_info;
 #endif /* _FM10K_PF_H */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index 9b29d7b0377a..95afb5c0c9c4 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -48,8 +48,8 @@ s32 fm10k_tlv_msg_init(u32 *msg, u16 msg_id)
  *  the attribute buffer.  It will return success if provided with a valid
  *  pointers.
  **/
-s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
-				   const unsigned char *string)
+static s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
+					  const unsigned char *string)
 {
 	u32 attr_data = 0, len = 0;
 	u32 *attr;
@@ -98,7 +98,7 @@ s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
  *  it in the array pointed by by string.  It will return success if provided
  *  with a valid pointers.
  **/
-s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
+static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
 {
 	u32 len;
 
@@ -353,7 +353,7 @@ s32 fm10k_tlv_attr_get_le_struct(u32 *attr, void *le_struct, u32 len)
  *  function will return NULL on failure, and a pointer to the start
  *  of the nested attributes on success.
  **/
-u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
+static u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
 {
 	u32 *attr;
 
@@ -370,7 +370,7 @@ u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
 }
 
 /**
- *  fm10k_tlv_attr_nest_start - Start a set of nested attributes
+ *  fm10k_tlv_attr_nest_stop - Stop a set of nested attributes
  *  @msg: Pointer to message block
  *
  *  This function closes off an existing set of nested attributes.  The
@@ -378,7 +378,7 @@ u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
  *  the case of a nest within the nest this would be the outer nest pointer.
  *  This function will return success provided all pointers are valid.
  **/
-s32 fm10k_tlv_attr_nest_stop(u32 *msg)
+static s32 fm10k_tlv_attr_nest_stop(u32 *msg)
 {
 	u32 *attr;
 	u32 len;
@@ -483,8 +483,8 @@ static s32 fm10k_tlv_attr_validate(u32 *attr,
  *  FM10K_NOT_IMPLEMENTED for any attribute that is outside of the array
  *  and 0 on success.
  **/
-s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
-			 const struct fm10k_tlv_attr *tlv_attr)
+static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
+				const struct fm10k_tlv_attr *tlv_attr)
 {
 	u32 i, attr_id, offset = 0;
 	s32 err = 0;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
index 7e045e8bf1eb..d5ad359c1d54 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -106,8 +106,6 @@ struct fm10k_msg_data {
 #define FM10K_MSG_HANDLER(id, attr, func) { id, attr, func }
 
 s32 fm10k_tlv_msg_init(u32 *, u16);
-s32 fm10k_tlv_attr_put_null_string(u32 *, u16, const unsigned char *);
-s32 fm10k_tlv_attr_get_null_string(u32 *, unsigned char *);
 s32 fm10k_tlv_attr_put_mac_vlan(u32 *, u16, const u8 *, u16);
 s32 fm10k_tlv_attr_get_mac_vlan(u32 *, u8 *, u16 *);
 s32 fm10k_tlv_attr_put_bool(u32 *, u16);
@@ -147,9 +145,6 @@ s32 fm10k_tlv_attr_get_value(u32 *, void *, u32);
 		fm10k_tlv_attr_get_value(attr, ptr, sizeof(s64))
 s32 fm10k_tlv_attr_put_le_struct(u32 *, u16, const void *, u32);
 s32 fm10k_tlv_attr_get_le_struct(u32 *, void *, u32);
-u32 *fm10k_tlv_attr_nest_start(u32 *, u16);
-s32 fm10k_tlv_attr_nest_stop(u32 *);
-s32 fm10k_tlv_attr_parse(u32 *, u32 **, const struct fm10k_tlv_attr *);
 s32 fm10k_tlv_msg_parse(struct fm10k_hw *, u32 *, struct fm10k_mbx_info *,
 			const struct fm10k_msg_data *);
 s32 fm10k_tlv_msg_error(struct fm10k_hw *hw, u32 **results,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 318a212f0a78..098883d2875f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -1,5 +1,5 @@
 /* Intel Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -77,6 +77,7 @@ struct fm10k_hw;
 #define FM10K_PCIE_SRIOV_CTRL_VFARI		0x10
 
 #define FM10K_ERR_PARAM				-2
+#define FM10K_ERR_NO_RESOURCES			-3
 #define FM10K_ERR_REQUESTS_PENDING		-4
 #define FM10K_ERR_RESET_REQUESTED		-5
 #define FM10K_ERR_DMA_PENDING			-6
@@ -271,6 +272,20 @@ struct fm10k_hw;
 #define FM10K_TDBAL(_n)		((0x40 * (_n)) + 0x8000)
 #define FM10K_TDBAH(_n)		((0x40 * (_n)) + 0x8001)
 #define FM10K_TDLEN(_n)		((0x40 * (_n)) + 0x8002)
+/* When fist initialized, VFs need to know the Interrupt Throttle Rate (ITR)
+ * scale which is based on the PCIe speed but the speed information in the PCI
+ * configuration space may not be accurate. The PF already knows the ITR scale
+ * but there is no defined method to pass that information from the PF to the
+ * VF. This is accomplished during VF initialization by temporarily co-opting
+ * the yet-to-be-used TDLEN register to have the PF store the ITR shift for
+ * the VF to retrieve before the VF needs to use the TDLEN register for its
+ * intended purpose, i.e. before the Tx resources are allocated.
+ */
+#define FM10K_TDLEN_ITR_SCALE_SHIFT		9
+#define FM10K_TDLEN_ITR_SCALE_MASK		0x00000E00
+#define FM10K_TDLEN_ITR_SCALE_GEN1		2
+#define FM10K_TDLEN_ITR_SCALE_GEN2		1
+#define FM10K_TDLEN_ITR_SCALE_GEN3		0
 #define FM10K_TPH_TXCTRL(_n)	((0x40 * (_n)) + 0x8003)
 #define FM10K_TPH_TXCTRL_DESC_TPHEN		0x00000020
 #define FM10K_TPH_TXCTRL_DESC_RROEN		0x00000200
@@ -339,7 +354,7 @@ struct fm10k_hw;
 #define FM10K_VLAN_TABLE_VID_MAX		4096
 #define FM10K_VLAN_TABLE_VSI_MAX		64
 #define FM10K_VLAN_LENGTH_SHIFT			16
-#define FM10K_VLAN_CLEAR			(1 << 15)
+#define FM10K_VLAN_CLEAR			BIT(15)
 #define FM10K_VLAN_ALL \
 	((FM10K_VLAN_TABLE_VID_MAX - 1) << FM10K_VLAN_LENGTH_SHIFT)
 
@@ -373,13 +388,13 @@ struct fm10k_hw;
 #define FM10K_SW_SYSTIME_PULSE(_n)	((_n) + 0x02252)
 
 enum fm10k_int_source {
-	fm10k_int_Mailbox	= 0,
-	fm10k_int_PCIeFault	= 1,
-	fm10k_int_SwitchUpDown	= 2,
-	fm10k_int_SwitchEvent	= 3,
-	fm10k_int_SRAM		= 4,
-	fm10k_int_VFLR		= 5,
-	fm10k_int_MaxHoldTime	= 6,
+	fm10k_int_mailbox		= 0,
+	fm10k_int_pcie_fault		= 1,
+	fm10k_int_switch_up_down	= 2,
+	fm10k_int_switch_event		= 3,
+	fm10k_int_sram			= 4,
+	fm10k_int_vflr			= 5,
+	fm10k_int_max_hold_time		= 6,
 	fm10k_int_sources_max_pf
 };
 
@@ -559,6 +574,7 @@ struct fm10k_mac_info {
 	bool get_host_state;
 	bool tx_ready;
 	u32 dglort_map;
+	u8 itr_scale;
 };
 
 struct fm10k_swapi_table_info {
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index 36c8b0aa08fd..5445c0fab49f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -28,7 +28,7 @@
 static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
 {
 	u8 *perm_addr = hw->mac.perm_addr;
-	u32 bal = 0, bah = 0;
+	u32 bal = 0, bah = 0, tdlen;
 	s32 err;
 	u16 i;
 
@@ -48,6 +48,9 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
 		       ((u32)perm_addr[2]);
 	}
 
+	/* restore default itr_scale for next VF initialization */
+	tdlen = hw->mac.itr_scale << FM10K_TDLEN_ITR_SCALE_SHIFT;
+
 	/* The queues have already been disabled so we just need to
 	 * update their base address registers
 	 */
@@ -56,6 +59,12 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
 		fm10k_write_reg(hw, FM10K_TDBAH(i), bah);
 		fm10k_write_reg(hw, FM10K_RDBAL(i), bal);
 		fm10k_write_reg(hw, FM10K_RDBAH(i), bah);
+		/* Restore ITR scale in software-defined mechanism in TDLEN
+		 * for next VF initialization. See definition of
+		 * FM10K_TDLEN_ITR_SCALE_SHIFT for more details on the use of
+		 * TDLEN here.
+		 */
+		fm10k_write_reg(hw, FM10K_TDLEN(i), tdlen);
 	}
 
 	return 0;
@@ -103,7 +112,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
 	s32 err;
 	u16 i;
 
-	/* assume we always have at least 1 queue */
+	/* verify we have at least 1 queue */
+	if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) ||
+	    !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) {
+		err = FM10K_ERR_NO_RESOURCES;
+		goto reset_max_queues;
+	}
+
+	/* determine how many queues we have */
 	for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) {
 		/* verify the Descriptor cache offsets are increasing */
 		tqdloc = ~fm10k_read_reg(hw, FM10K_TQDLOC(i));
@@ -119,16 +135,28 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
 	/* shut down queues we own and reset DMA configuration */
 	err = fm10k_disable_queues_generic(hw, i);
 	if (err)
-		return err;
+		goto reset_max_queues;
 
 	/* record maximum queue count */
 	hw->mac.max_queues = i;
 
-	/* fetch default VLAN */
+	/* fetch default VLAN and ITR scale */
 	hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) &
 			       FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
+	/* Read the ITR scale from TDLEN. See the definition of
+	 * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
+	 * used here.
+	 */
+	hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) &
+			     FM10K_TDLEN_ITR_SCALE_MASK) >>
+			    FM10K_TDLEN_ITR_SCALE_SHIFT;
 
 	return 0;
+
+reset_max_queues:
+	hw->mac.max_queues = 0;
+
+	return err;
 }
 
 /* This structure defines the attibutes to be parsed below */
@@ -414,6 +442,7 @@ static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw, u16 glort, u8 mode)
 
 	if (mode > FM10K_XCAST_MODE_NONE)
 		return FM10K_ERR_PARAM;
+
 	/* generate message requesting to change xcast mode */
 	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE);
 	fm10k_tlv_attr_put_u8(msg, FM10K_LPORT_STATE_MSG_XCAST_MODE, mode);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 4dd3e26129b4..b7bc014ae00b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -42,7 +42,6 @@
 #include <linux/string.h>
 #include <linux/in.h>
 #include <linux/ip.h>
-#include <linux/tcp.h>
 #include <linux/sctp.h>
 #include <linux/pkt_sched.h>
 #include <linux/ipv6.h>
@@ -104,6 +103,7 @@
 #define I40E_PRIV_FLAGS_LINKPOLL_FLAG	BIT(1)
 #define I40E_PRIV_FLAGS_FD_ATR		BIT(2)
 #define I40E_PRIV_FLAGS_VEB_STATS	BIT(3)
+#define I40E_PRIV_FLAGS_PS		BIT(4)
 
 #define I40E_NVM_VERSION_LO_SHIFT  0
 #define I40E_NVM_VERSION_LO_MASK   (0xff << I40E_NVM_VERSION_LO_SHIFT)
@@ -187,6 +187,7 @@ struct i40e_lump_tracking {
 #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4)
 
 #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
+#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
 
 enum i40e_fd_stat_idx {
 	I40E_FD_STAT_ATR,
@@ -265,7 +266,7 @@ struct i40e_pf {
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
 	int queues_left;           /* queues left unclaimed */
-	u16 rss_size;              /* num queues in the RSS array */
+	u16 alloc_rss_size;        /* allocated RSS queues */
 	u16 rss_size_max;          /* HW defined max RSS queues */
 	u16 fdir_pf_filter_count;  /* num of guaranteed filters for this PF */
 	u16 num_alloc_vsi;         /* num VSIs this driver supports */
@@ -412,7 +413,7 @@ struct i40e_pf {
 	u32 rx_hwtstamp_cleared;
 	bool ptp_tx;
 	bool ptp_rx;
-	u16 rss_table_size;
+	u16 rss_table_size; /* HW RSS table size */
 	/* These are only valid in NPAR modes */
 	u32 npar_max_bw;
 	u32 npar_min_bw;
@@ -487,6 +488,7 @@ struct i40e_vsi {
 	u32 tx_restart;
 	u32 tx_busy;
 	u64 tx_linearize;
+	u64 tx_force_wb;
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 
@@ -504,8 +506,10 @@ struct i40e_vsi {
 	u16 tx_itr_setting;
 	u16 int_rate_limit;  /* value in usecs */
 
-	u16 rss_table_size;
-	u16 rss_size;
+	u16 rss_table_size; /* HW RSS table size */
+	u16 rss_size;       /* Allocated RSS queues */
+	u8  *rss_hkey_user; /* User configured hash keys */
+	u8  *rss_lut_user;  /* User configured lookup table entries */
 
 	u16 max_frame;
 	u16 rx_hdr_len;
@@ -575,6 +579,9 @@ struct i40e_q_vector {
 
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 
+#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */
+	unsigned long hung_detected; /* Set/Reset for hung_detection logic */
+
 	cpumask_t affinity_mask;
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[I40E_INT_NAME_STR_LEN];
@@ -602,8 +609,8 @@ static inline char *i40e_nvm_version_str(struct i40e_hw *hw)
 
 	full_ver = hw->nvm.oem_ver;
 	ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT);
-	build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT)
-		 & I40E_OEM_VER_BUILD_MASK);
+	build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) &
+		 I40E_OEM_VER_BUILD_MASK);
 	patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK);
 
 	snprintf(buf, sizeof(buf),
@@ -668,6 +675,8 @@ extern const char i40e_driver_name[];
 extern const char i40e_driver_version_str[];
 void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags);
 void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
+int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
 void i40e_update_stats(struct i40e_vsi *vsi);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
@@ -691,7 +700,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 					bool is_vf, bool is_netdev);
 void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan,
 		     bool is_vf, bool is_netdev);
-int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl);
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 				u16 uplink, u32 param1);
 int i40e_vsi_release(struct i40e_vsi *vsi);
@@ -709,7 +718,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
 void i40e_veb_release(struct i40e_veb *veb);
 
 int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc);
-i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid);
+int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid);
 void i40e_vsi_remove_pvid(struct i40e_vsi *vsi);
 void i40e_vsi_reset_stats(struct i40e_vsi *vsi);
 void i40e_pf_reset_stats(struct i40e_pf *pf);
@@ -767,6 +776,8 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
 int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
 struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
 					     bool is_vf, bool is_netdev);
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
+			  bool is_vf, bool is_netdev);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
 struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
 				      bool is_vf, bool is_netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 6584b6cd73fd..b22012a446a6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -227,6 +227,7 @@ enum i40e_admin_queue_opc {
 	i40e_aqc_opc_nvm_update			= 0x0703,
 	i40e_aqc_opc_nvm_config_read		= 0x0704,
 	i40e_aqc_opc_nvm_config_write		= 0x0705,
+	i40e_aqc_opc_oem_post_update		= 0x0720,
 
 	/* virtualization commands */
 	i40e_aqc_opc_send_msg_to_pf		= 0x0801,
@@ -1891,6 +1892,26 @@ struct i40e_aqc_nvm_config_data_immediate_field {
 
 I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
 
+/* OEM Post Update (indirect 0x0720)
+ * no command data struct used
+ */
+struct i40e_aqc_nvm_oem_post_update {
+#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA	0x01
+	u8 sel_data;
+	u8 reserved[7];
+};
+
+I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update);
+
+struct i40e_aqc_nvm_oem_post_update_buffer {
+	u8 str_len;
+	u8 dev_addr;
+	__le16 eeprom_addr;
+	u8 data[36];
+};
+
+I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer);
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)
@@ -2403,4 +2424,4 @@ struct i40e_aqc_debug_modify_internals {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
 
-#endif
+#endif /* _I40E_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d4b7af9a2fc8..10744a698d6f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -103,8 +103,8 @@ static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer,
 	len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos));
 
 	bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len);
-	if (bytes_not_copied < 0)
-		return bytes_not_copied;
+	if (bytes_not_copied)
+		return -EFAULT;
 
 	*ppos += len;
 	return len;
@@ -353,8 +353,8 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer,
 	bytes_not_copied = copy_to_user(buffer, buf, len);
 	kfree(buf);
 
-	if (bytes_not_copied < 0)
-		return bytes_not_copied;
+	if (bytes_not_copied)
+		return -EFAULT;
 
 	*ppos = len;
 	return len;
@@ -981,12 +981,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 	if (!cmd_buf)
 		return count;
 	bytes_not_copied = copy_from_user(cmd_buf, buffer, count);
-	if (bytes_not_copied < 0) {
+	if (bytes_not_copied) {
 		kfree(cmd_buf);
-		return bytes_not_copied;
+		return -EFAULT;
 	}
-	if (bytes_not_copied > 0)
-		count -= bytes_not_copied;
 	cmd_buf[count] = '\0';
 
 	cmd_buf_tmp = strchr(cmd_buf, '\n');
@@ -1140,7 +1138,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		spin_lock_bh(&vsi->mac_filter_list_lock);
 		f = i40e_add_filter(vsi, ma, vlan, false, false);
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
-		ret = i40e_sync_vsi_filters(vsi, true);
+		ret = i40e_sync_vsi_filters(vsi);
 		if (f && !ret)
 			dev_info(&pf->pdev->dev,
 				 "add macaddr: %pM vlan=%d added to VSI %d\n",
@@ -1179,7 +1177,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		spin_lock_bh(&vsi->mac_filter_list_lock);
 		i40e_del_filter(vsi, ma, vlan, false, false);
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
-		ret = i40e_sync_vsi_filters(vsi, true);
+		ret = i40e_sync_vsi_filters(vsi);
 		if (!ret)
 			dev_info(&pf->pdev->dev,
 				 "del macaddr: %pM vlan=%d removed from VSI %d\n",
@@ -2034,8 +2032,8 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer,
 	bytes_not_copied = copy_to_user(buffer, buf, len);
 	kfree(buf);
 
-	if (bytes_not_copied < 0)
-		return bytes_not_copied;
+	if (bytes_not_copied)
+		return -EFAULT;
 
 	*ppos = len;
 	return len;
@@ -2068,10 +2066,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp,
 	memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf));
 	bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf,
 					  buffer, count);
-	if (bytes_not_copied < 0)
-		return bytes_not_copied;
-	else if (bytes_not_copied > 0)
-		count -= bytes_not_copied;
+	if (bytes_not_copied)
+		return -EFAULT;
 	i40e_dbg_netdev_ops_buf[count] = '\0';
 
 	buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n');
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3f385ffe420f..29d5833e24a3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -88,6 +88,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
 	I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
 	I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 	I40E_VSI_STAT("tx_linearize", tx_linearize),
+	I40E_VSI_STAT("tx_force_wb", tx_force_wb),
 };
 
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
@@ -230,6 +231,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
 	"LinkPolling",
 	"flow-director-atr",
 	"veb-stats",
+	"packet-split",
 };
 
 #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
@@ -2110,7 +2112,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		cmd->data = vsi->alloc_queue_pairs;
+		cmd->data = vsi->num_queue_pairs;
 		ret = 0;
 		break;
 	case ETHTOOL_GRXFH:
@@ -2583,7 +2585,6 @@ static int i40e_set_channels(struct net_device *dev,
 		return -EINVAL;
 }
 
-#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
 /**
  * i40e_get_rxfh_key_size - get the RSS hash key size
  * @netdev: network interface device structure
@@ -2611,10 +2612,9 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	u32 reg_val;
-	int i, j;
+	u8 *lut, *seed = NULL;
+	int ret;
+	u16 i;
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
@@ -2622,24 +2622,20 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	if (!indir)
 		return 0;
 
-	for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) {
-		reg_val = rd32(hw, I40E_PFQF_HLUT(i));
-		indir[j++] = reg_val & 0xff;
-		indir[j++] = (reg_val >> 8) & 0xff;
-		indir[j++] = (reg_val >> 16) & 0xff;
-		indir[j++] = (reg_val >> 24) & 0xff;
-	}
+	seed = key;
+	lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+	ret = i40e_get_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE);
+	if (ret)
+		goto out;
+	for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+		indir[i] = (u32)(lut[i]);
 
-	if (key) {
-		for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) {
-			reg_val = rd32(hw, I40E_PFQF_HKEY(i));
-			key[j++] = (u8)(reg_val & 0xff);
-			key[j++] = (u8)((reg_val >> 8) & 0xff);
-			key[j++] = (u8)((reg_val >> 16) & 0xff);
-			key[j++] = (u8)((reg_val >> 24) & 0xff);
-		}
-	}
-	return 0;
+out:
+	kfree(lut);
+
+	return ret;
 }
 
 /**
@@ -2656,10 +2652,8 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	u32 reg_val;
-	int i, j;
+	u8 *seed = NULL;
+	u16 i;
 
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
@@ -2667,24 +2661,28 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
 	if (!indir)
 		return 0;
 
-	for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) {
-		reg_val = indir[j++];
-		reg_val |= indir[j++] << 8;
-		reg_val |= indir[j++] << 16;
-		reg_val |= indir[j++] << 24;
-		wr32(hw, I40E_PFQF_HLUT(i), reg_val);
-	}
-
 	if (key) {
-		for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) {
-			reg_val = key[j++];
-			reg_val |= key[j++] << 8;
-			reg_val |= key[j++] << 16;
-			reg_val |= key[j++] << 24;
-			wr32(hw, I40E_PFQF_HKEY(i), reg_val);
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE,
+						     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
 		}
+		memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
 	}
-	return 0;
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+		vsi->rss_lut_user[i] = (u8)(indir[i]);
+
+	return i40e_config_rss(vsi, seed, vsi->rss_lut_user,
+			       I40E_HLUT_ARRAY_SIZE);
 }
 
 /**
@@ -2712,6 +2710,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
 		I40E_PRIV_FLAGS_FD_ATR : 0;
 	ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ?
 		I40E_PRIV_FLAGS_VEB_STATS : 0;
+	ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ?
+		I40E_PRIV_FLAGS_PS : 0;
 
 	return ret_flags;
 }
@@ -2726,6 +2726,26 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	bool reset_required = false;
+
+	/* NOTE: MFP is not settable */
+
+	/* allow the user to control the method of receive
+	 * buffer DMA, whether the packet is split at header
+	 * boundaries into two separate buffers.  In some cases
+	 * one routine or the other will perform better.
+	 */
+	if ((flags & I40E_PRIV_FLAGS_PS) &&
+	    !(pf->flags & I40E_FLAG_RX_PS_ENABLED)) {
+		pf->flags |= I40E_FLAG_RX_PS_ENABLED;
+		pf->flags &= ~I40E_FLAG_RX_1BUF_ENABLED;
+		reset_required = true;
+	} else if (!(flags & I40E_PRIV_FLAGS_PS) &&
+		   (pf->flags & I40E_FLAG_RX_PS_ENABLED)) {
+		pf->flags &= ~I40E_FLAG_RX_PS_ENABLED;
+		pf->flags |= I40E_FLAG_RX_1BUF_ENABLED;
+		reset_required = true;
+	}
 
 	if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG)
 		pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED;
@@ -2748,6 +2768,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	else
 		pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
 
+	/* if needed, issue reset to cause things to take effect */
+	if (reset_required)
+		i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index fe5d9bf3ed6d..579a46ca82df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -1544,8 +1544,6 @@ void i40e_fcoe_vsi_setup(struct i40e_pf *pf)
 	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED))
 		return;
 
-	BUG_ON(!pf->vsi[pf->lan_vsi]);
-
 	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		vsi = pf->vsi[i];
 		if (vsi && vsi->type == I40E_VSI_FCOE) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index 79ae7beeafe5..daa9204426d4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -762,7 +762,7 @@ static void i40e_write_byte(u8 *hmc_bits,
 
 	/* prepare the bits and mask */
 	shift_width = ce_info->lsb % 8;
-	mask = BIT(ce_info->width) - 1;
+	mask = (u8)(BIT(ce_info->width) - 1);
 
 	src_byte = *from;
 	src_byte &= mask;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b825f978d441..b118deb08ce6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -38,8 +38,8 @@ static const char i40e_driver_string[] =
 #define DRV_KERN "-k"
 
 #define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 46
+#define DRV_VERSION_MINOR 4
+#define DRV_VERSION_BUILD 8
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -55,6 +55,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit);
 static int i40e_setup_misc_vector(struct i40e_pf *pf);
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
+static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+			      u16 rss_table_size, u16 rss_size);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 
@@ -790,75 +792,6 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi)
 
 #endif
 /**
- * i40e_update_link_xoff_rx - Update XOFF received in link flow control mode
- * @pf: the corresponding PF
- *
- * Update the Rx XOFF counter (PAUSE frames) in link flow control mode
- **/
-static void i40e_update_link_xoff_rx(struct i40e_pf *pf)
-{
-	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
-	struct i40e_hw_port_stats *nsd = &pf->stats;
-	struct i40e_hw *hw = &pf->hw;
-	u64 xoff = 0;
-
-	if ((hw->fc.current_mode != I40E_FC_FULL) &&
-	    (hw->fc.current_mode != I40E_FC_RX_PAUSE))
-		return;
-
-	xoff = nsd->link_xoff_rx;
-	i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
-			   pf->stat_offsets_loaded,
-			   &osd->link_xoff_rx, &nsd->link_xoff_rx);
-
-	/* No new LFC xoff rx */
-	if (!(nsd->link_xoff_rx - xoff))
-		return;
-
-}
-
-/**
- * i40e_update_prio_xoff_rx - Update XOFF received in PFC mode
- * @pf: the corresponding PF
- *
- * Update the Rx XOFF counter (PAUSE frames) in PFC mode
- **/
-static void i40e_update_prio_xoff_rx(struct i40e_pf *pf)
-{
-	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
-	struct i40e_hw_port_stats *nsd = &pf->stats;
-	bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false};
-	struct i40e_dcbx_config *dcb_cfg;
-	struct i40e_hw *hw = &pf->hw;
-	u16 i;
-	u8 tc;
-
-	dcb_cfg = &hw->local_dcbx_config;
-
-	/* Collect Link XOFF stats when PFC is disabled */
-	if (!dcb_cfg->pfc.pfcenable) {
-		i40e_update_link_xoff_rx(pf);
-		return;
-	}
-
-	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		u64 prio_xoff = nsd->priority_xoff_rx[i];
-
-		i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
-				   pf->stat_offsets_loaded,
-				   &osd->priority_xoff_rx[i],
-				   &nsd->priority_xoff_rx[i]);
-
-		/* No new PFC xoff rx */
-		if (!(nsd->priority_xoff_rx[i] - prio_xoff))
-			continue;
-		/* Get the TC for given priority */
-		tc = dcb_cfg->etscfg.prioritytable[i];
-		xoff[tc] = true;
-	}
-}
-
-/**
  * i40e_update_vsi_stats - Update the vsi statistics counters.
  * @vsi: the VSI to be updated
  *
@@ -881,6 +814,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	u64 bytes, packets;
 	unsigned int start;
 	u64 tx_linearize;
+	u64 tx_force_wb;
 	u64 rx_p, rx_b;
 	u64 tx_p, tx_b;
 	u16 q;
@@ -899,7 +833,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	 */
 	rx_b = rx_p = 0;
 	tx_b = tx_p = 0;
-	tx_restart = tx_busy = tx_linearize = 0;
+	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
 	rx_page = 0;
 	rx_buf = 0;
 	rcu_read_lock();
@@ -917,6 +851,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 		tx_restart += p->tx_stats.restart_queue;
 		tx_busy += p->tx_stats.tx_busy;
 		tx_linearize += p->tx_stats.tx_linearize;
+		tx_force_wb += p->tx_stats.tx_force_wb;
 
 		/* Rx queue is part of the same block as Tx queue */
 		p = &p[1];
@@ -934,6 +869,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
 	vsi->tx_restart = tx_restart;
 	vsi->tx_busy = tx_busy;
 	vsi->tx_linearize = tx_linearize;
+	vsi->tx_force_wb = tx_force_wb;
 	vsi->rx_page_failed = rx_page;
 	vsi->rx_buf_failed = rx_buf;
 
@@ -1049,12 +985,18 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
 	i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
 			   pf->stat_offsets_loaded,
 			   &osd->link_xon_tx, &nsd->link_xon_tx);
-	i40e_update_prio_xoff_rx(pf);  /* handles I40E_GLPRT_LXOFFRXC */
+	i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xoff_rx, &nsd->link_xoff_rx);
 	i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
 			   pf->stat_offsets_loaded,
 			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
 
 	for (i = 0; i < 8; i++) {
+		i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xoff_rx[i],
+				   &nsd->priority_xoff_rx[i]);
 		i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
 				   pf->stat_offsets_loaded,
 				   &osd->priority_xon_rx[i],
@@ -1317,6 +1259,42 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
 }
 
 /**
+ * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be removed
+ * @is_vf: true if it is a VF
+ * @is_netdev: true if it is a netdev
+ *
+ * Removes a given MAC address from a VSI, regardless of VLAN
+ *
+ * Returns 0 for success, or error
+ **/
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
+			  bool is_vf, bool is_netdev)
+{
+	struct i40e_mac_filter *f = NULL;
+	int changed = 0;
+
+	WARN(!spin_is_locked(&vsi->mac_filter_list_lock),
+	     "Missing mac_filter_list_lock\n");
+	list_for_each_entry(f, &vsi->mac_filter_list, list) {
+		if ((ether_addr_equal(macaddr, f->macaddr)) &&
+		    (is_vf == f->is_vf) &&
+		    (is_netdev == f->is_netdev)) {
+			f->counter--;
+			f->changed = true;
+			changed = 1;
+		}
+	}
+	if (changed) {
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+/**
  * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
  * @vsi: the PF Main VSI - inappropriate for any other VSI
  * @macaddr: the MAC address
@@ -1547,10 +1525,9 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
 	}
 
-	i40e_sync_vsi_filters(vsi, false);
 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 
-	return 0;
+	return i40e_sync_vsi_filters(vsi);
 }
 
 /**
@@ -1590,7 +1567,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
 		/* Find numtc from enabled TC bitmap */
 		for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-			if (enabled_tc & BIT_ULL(i)) /* TC is enabled */
+			if (enabled_tc & BIT(i)) /* TC is enabled */
 				numtc++;
 		}
 		if (!numtc) {
@@ -1619,13 +1596,14 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	/* Setup queue offset/count for all TCs for given VSI */
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
 		/* See if the given TC is enabled for the given VSI */
-		if (vsi->tc_config.enabled_tc & BIT_ULL(i)) {
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
 			/* TC is enabled */
 			int pow, num_qps;
 
 			switch (vsi->type) {
 			case I40E_VSI_MAIN:
-				qcount = min_t(int, pf->rss_size, num_tc_qps);
+				qcount = min_t(int, pf->alloc_rss_size,
+					       num_tc_qps);
 				break;
 #ifdef I40E_FCOE
 			case I40E_VSI_FCOE:
@@ -1851,13 +1829,12 @@ static void i40e_cleanup_add_list(struct list_head *add_list)
 /**
  * i40e_sync_vsi_filters - Update the VSI filter list to the HW
  * @vsi: ptr to the VSI
- * @grab_rtnl: whether RTNL needs to be grabbed
  *
  * Push any outstanding VSI filter changes through the AdminQ.
  *
  * Returns 0 or error value
  **/
-int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 {
 	struct list_head tmp_del_list, tmp_add_list;
 	struct i40e_mac_filter *f, *ftmp, *fclone;
@@ -1865,8 +1842,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 	bool add_happened = false;
 	int filter_list_len = 0;
 	u32 changed_flags = 0;
+	i40e_status aq_ret = 0;
 	bool err_cond = false;
-	i40e_status ret = 0;
+	int retval = 0;
 	struct i40e_pf *pf;
 	int num_add = 0;
 	int num_del = 0;
@@ -1929,17 +1907,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 		}
 		spin_unlock_bh(&vsi->mac_filter_list_lock);
 
-		if (err_cond)
+		if (err_cond) {
 			i40e_cleanup_add_list(&tmp_add_list);
+			retval = -ENOMEM;
+			goto out;
+		}
 	}
 
 	/* Now process 'del_list' outside the lock */
 	if (!list_empty(&tmp_del_list)) {
+		int del_list_size;
+
 		filter_list_len = pf->hw.aq.asq_buf_size /
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
-		del_list = kcalloc(filter_list_len,
-			    sizeof(struct i40e_aqc_remove_macvlan_element_data),
-			    GFP_KERNEL);
+		del_list_size = filter_list_len *
+			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
+		del_list = kzalloc(del_list_size, GFP_KERNEL);
 		if (!del_list) {
 			i40e_cleanup_add_list(&tmp_add_list);
 
@@ -1948,7 +1931,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 			i40e_undo_del_filter_entries(vsi, &tmp_del_list);
 			i40e_undo_add_filter_entries(vsi);
 			spin_unlock_bh(&vsi->mac_filter_list_lock);
-			return -ENOMEM;
+			retval = -ENOMEM;
+			goto out;
 		}
 
 		list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) {
@@ -1966,18 +1950,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 
 			/* flush a full buffer */
 			if (num_del == filter_list_len) {
-				ret = i40e_aq_remove_macvlan(&pf->hw,
-						  vsi->seid, del_list, num_del,
-						  NULL);
+				aq_ret = i40e_aq_remove_macvlan(&pf->hw,
+								vsi->seid,
+								del_list,
+								num_del,
+								NULL);
 				aq_err = pf->hw.aq.asq_last_status;
 				num_del = 0;
-				memset(del_list, 0, sizeof(*del_list));
+				memset(del_list, 0, del_list_size);
 
-				if (ret && aq_err != I40E_AQ_RC_ENOENT)
+				if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) {
+					retval = -EIO;
 					dev_err(&pf->pdev->dev,
 						"ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n",
-						i40e_stat_str(&pf->hw, ret),
+						i40e_stat_str(&pf->hw, aq_ret),
 						i40e_aq_str(&pf->hw, aq_err));
+				}
 			}
 			/* Release memory for MAC filter entries which were
 			 * synced up with HW.
@@ -1987,15 +1975,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 		}
 
 		if (num_del) {
-			ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
-						     del_list, num_del, NULL);
+			aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
+							del_list, num_del,
+							NULL);
 			aq_err = pf->hw.aq.asq_last_status;
 			num_del = 0;
 
-			if (ret && aq_err != I40E_AQ_RC_ENOENT)
+			if (aq_ret && aq_err != I40E_AQ_RC_ENOENT)
 				dev_info(&pf->pdev->dev,
 					 "ignoring delete macvlan error, err %s aq_err %s\n",
-					 i40e_stat_str(&pf->hw, ret),
+					 i40e_stat_str(&pf->hw, aq_ret),
 					 i40e_aq_str(&pf->hw, aq_err));
 		}
 
@@ -2004,13 +1993,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 	}
 
 	if (!list_empty(&tmp_add_list)) {
+		int add_list_size;
 
 		/* do all the adds now */
 		filter_list_len = pf->hw.aq.asq_buf_size /
 			       sizeof(struct i40e_aqc_add_macvlan_element_data),
-		add_list = kcalloc(filter_list_len,
-			       sizeof(struct i40e_aqc_add_macvlan_element_data),
-			       GFP_KERNEL);
+		add_list_size = filter_list_len *
+			       sizeof(struct i40e_aqc_add_macvlan_element_data);
+		add_list = kzalloc(add_list_size, GFP_KERNEL);
 		if (!add_list) {
 			/* Purge element from temporary lists */
 			i40e_cleanup_add_list(&tmp_add_list);
@@ -2019,7 +2009,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 			spin_lock_bh(&vsi->mac_filter_list_lock);
 			i40e_undo_add_filter_entries(vsi);
 			spin_unlock_bh(&vsi->mac_filter_list_lock);
-			return -ENOMEM;
+			retval = -ENOMEM;
+			goto out;
 		}
 
 		list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
@@ -2040,15 +2031,15 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
-				ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
-							  add_list, num_add,
-							  NULL);
+				aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+							     add_list, num_add,
+							     NULL);
 				aq_err = pf->hw.aq.asq_last_status;
 				num_add = 0;
 
-				if (ret)
+				if (aq_ret)
 					break;
-				memset(add_list, 0, sizeof(*add_list));
+				memset(add_list, 0, add_list_size);
 			}
 			/* Entries from tmp_add_list were cloned from MAC
 			 * filter list, hence clean those cloned entries
@@ -2058,18 +2049,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 		}
 
 		if (num_add) {
-			ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
-						  add_list, num_add, NULL);
+			aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+						     add_list, num_add, NULL);
 			aq_err = pf->hw.aq.asq_last_status;
 			num_add = 0;
 		}
 		kfree(add_list);
 		add_list = NULL;
 
-		if (add_happened && ret && aq_err != I40E_AQ_RC_EINVAL) {
+		if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) {
+			retval = i40e_aq_rc_to_posix(aq_ret, aq_err);
 			dev_info(&pf->pdev->dev,
 				 "add filter failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, ret),
+				 i40e_stat_str(&pf->hw, aq_ret),
 				 i40e_aq_str(&pf->hw, aq_err));
 			if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
 			    !test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
@@ -2087,16 +2079,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 		bool cur_multipromisc;
 
 		cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
-		ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
-							    vsi->seid,
-							    cur_multipromisc,
-							    NULL);
-		if (ret)
+		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
+							       vsi->seid,
+							       cur_multipromisc,
+							       NULL);
+		if (aq_ret) {
+			retval = i40e_aq_rc_to_posix(aq_ret,
+						     pf->hw.aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
 				 "set multi promisc failed, err %s aq_err %s\n",
-				 i40e_stat_str(&pf->hw, ret),
+				 i40e_stat_str(&pf->hw, aq_ret),
 				 i40e_aq_str(&pf->hw,
 					     pf->hw.aq.asq_last_status));
+		}
 	}
 	if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
 		bool cur_promisc;
@@ -2112,44 +2107,50 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl)
 			 */
 			if (pf->cur_promisc != cur_promisc) {
 				pf->cur_promisc = cur_promisc;
-				if (grab_rtnl)
-					i40e_do_reset_safe(pf,
-						BIT(__I40E_PF_RESET_REQUESTED));
-				else
-					i40e_do_reset(pf,
-						BIT(__I40E_PF_RESET_REQUESTED));
+				set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
 			}
 		} else {
-			ret = i40e_aq_set_vsi_unicast_promiscuous(
+			aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
 							  &vsi->back->hw,
 							  vsi->seid,
 							  cur_promisc, NULL);
-			if (ret)
+			if (aq_ret) {
+				retval =
+				i40e_aq_rc_to_posix(aq_ret,
+						    pf->hw.aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
 					 "set unicast promisc failed, err %d, aq_err %d\n",
-					 ret, pf->hw.aq.asq_last_status);
-			ret = i40e_aq_set_vsi_multicast_promiscuous(
+					 aq_ret, pf->hw.aq.asq_last_status);
+			}
+			aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
 							  &vsi->back->hw,
 							  vsi->seid,
 							  cur_promisc, NULL);
-			if (ret)
+			if (aq_ret) {
+				retval =
+				i40e_aq_rc_to_posix(aq_ret,
+						    pf->hw.aq.asq_last_status);
 				dev_info(&pf->pdev->dev,
 					 "set multicast promisc failed, err %d, aq_err %d\n",
-					 ret, pf->hw.aq.asq_last_status);
+					 aq_ret, pf->hw.aq.asq_last_status);
+			}
 		}
-		ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
-						vsi->seid,
-						cur_promisc, NULL);
-		if (ret)
+		aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
+						   vsi->seid,
+						   cur_promisc, NULL);
+		if (aq_ret) {
+			retval = i40e_aq_rc_to_posix(aq_ret,
+						     pf->hw.aq.asq_last_status);
 			dev_info(&pf->pdev->dev,
 				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(&pf->hw, ret),
+				 i40e_stat_str(&pf->hw, aq_ret),
 				 i40e_aq_str(&pf->hw,
 					     pf->hw.aq.asq_last_status));
+		}
 	}
-
+out:
 	clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
-	return 0;
+	return retval;
 }
 
 /**
@@ -2166,8 +2167,15 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
-		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
-			i40e_sync_vsi_filters(pf->vsi[v], true);
+		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) {
+			int ret = i40e_sync_vsi_filters(pf->vsi[v]);
+
+			if (ret) {
+				/* come back and try again later */
+				pf->flags |= I40E_FLAG_FILTER_SYNC;
+				break;
+			}
+		}
 	}
 }
 
@@ -2377,16 +2385,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 		}
 	}
 
-	/* Make sure to release before sync_vsi_filter because that
-	 * function will lock/unlock as necessary
-	 */
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
-	if (test_bit(__I40E_DOWN, &vsi->back->state) ||
-	    test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
-		return 0;
-
-	return i40e_sync_vsi_filters(vsi, false);
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(vsi->back);
+	return 0;
 }
 
 /**
@@ -2459,16 +2464,13 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 		}
 	}
 
-	/* Make sure to release before sync_vsi_filter because that
-	 * function with lock/unlock as necessary
-	 */
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
-	if (test_bit(__I40E_DOWN, &vsi->back->state) ||
-	    test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
-		return 0;
-
-	return i40e_sync_vsi_filters(vsi, false);
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(vsi->back);
+	return 0;
 }
 
 /**
@@ -2711,6 +2713,11 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
 		netif_set_xps_queue(ring->netdev, mask, ring->queue_index);
 		free_cpumask_var(mask);
 	}
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(vsi->back);
 }
 
 /**
@@ -4360,17 +4367,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
 	else
 		val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
 
+	/* Bail out if interrupts are disabled because napi_poll
+	 * execution in-progress or will get scheduled soon.
+	 * napi_poll cleans TX and RX queues and updates 'next_to_clean'.
+	 */
+	if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))
+		return;
+
 	head = i40e_get_head(tx_ring);
 
 	tx_pending = i40e_get_tx_pending(tx_ring);
 
-	/* Interrupts are disabled and TX pending is non-zero,
-	 * trigger the SW interrupt (don't wait). Worst case
-	 * there will be one extra interrupt which may result
-	 * into not cleaning any queues because queues are cleaned.
+	/* HW is done executing descriptors, updated HEAD write back,
+	 * but SW hasn't processed those descriptors. If interrupt is
+	 * not generated from this point ON, it could result into
+	 * dev_watchdog detecting timeout on those netdev_queue,
+	 * hence proactively trigger SW interrupt.
 	 */
-	if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
-		i40e_force_wb(vsi, tx_ring->q_vector);
+	if (tx_pending) {
+		/* NAPI Poll didn't run and clear since it was set */
+		if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT,
+				       &tx_ring->q_vector->hung_detected)) {
+			netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
+				    vsi->seid, q_idx, tx_pending,
+				    tx_ring->next_to_clean, head,
+				    tx_ring->next_to_use,
+				    readl(tx_ring->tail));
+			netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n",
+				    vsi->seid, q_idx, val);
+			i40e_force_wb(vsi, tx_ring->q_vector);
+		} else {
+			/* First Chance - detected possible hung */
+			set_bit(I40E_Q_VECTOR_HUNG_DETECT,
+				&tx_ring->q_vector->hung_detected);
+		}
+	}
 }
 
 /**
@@ -4441,7 +4472,7 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
 		if (app.selector == I40E_APP_SEL_TCPIP &&
 		    app.protocolid == I40E_APP_PROTOID_ISCSI) {
 			tc = dcbcfg->etscfg.prioritytable[app.priority];
-			enabled_tc |= BIT_ULL(tc);
+			enabled_tc |= BIT(tc);
 			break;
 		}
 	}
@@ -4525,7 +4556,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 	/* At least have TC0 */
 	enabled_tc = (enabled_tc ? enabled_tc : 0x1);
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		if (enabled_tc & BIT_ULL(i))
+		if (enabled_tc & BIT(i))
 			num_tc++;
 	}
 	return num_tc;
@@ -4547,7 +4578,7 @@ static u8 i40e_pf_get_default_tc(struct i40e_pf *pf)
 
 	/* Find the first enabled TC */
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		if (enabled_tc & BIT_ULL(i))
+		if (enabled_tc & BIT(i))
 			break;
 	}
 
@@ -4707,7 +4738,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 		 * will set the numtc for netdev as 2 that will be
 		 * referenced by the netdev layer as TC 0 and 1.
 		 */
-		if (vsi->tc_config.enabled_tc & BIT_ULL(i))
+		if (vsi->tc_config.enabled_tc & BIT(i))
 			netdev_set_tc_queue(netdev,
 					vsi->tc_config.tc_info[i].netdev_tc,
 					vsi->tc_config.tc_info[i].qcount,
@@ -4769,7 +4800,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 
 	/* Enable ETS TCs with equal BW Share for now across all VSIs */
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		if (enabled_tc & BIT_ULL(i))
+		if (enabled_tc & BIT(i))
 			bw_share[i] = 1;
 	}
 
@@ -4843,7 +4874,7 @@ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
 
 	/* Enable ETS TCs with equal BW Share for now */
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		if (enabled_tc & BIT_ULL(i))
+		if (enabled_tc & BIT(i))
 			bw_data.tc_bw_share_credits[i] = 1;
 	}
 
@@ -5240,7 +5271,7 @@ static int i40e_setup_tc(struct net_device *netdev, u8 tc)
 
 	/* Generate TC map for number of tc requested */
 	for (i = 0; i < tc; i++)
-		enabled_tc |= BIT_ULL(i);
+		enabled_tc |= BIT(i);
 
 	/* Requesting same TC configuration as already enabled */
 	if (enabled_tc == vsi->tc_config.enabled_tc)
@@ -5738,7 +5769,7 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
  **/
 static void i40e_service_event_complete(struct i40e_pf *pf)
 {
-	BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state));
+	WARN_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state));
 
 	/* flush memory to make sure state is correct before next watchog */
 	smp_mb__before_atomic();
@@ -6013,6 +6044,9 @@ static void i40e_link_event(struct i40e_pf *pf)
 	i40e_status status;
 	bool new_link, old_link;
 
+	/* save off old link status information */
+	pf->hw.phy.link_info_old = pf->hw.phy.link_info;
+
 	/* set this to force the get_link_status call to refresh state */
 	pf->hw.phy.get_link_info = true;
 
@@ -6101,23 +6135,23 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
 
 	rtnl_lock();
 	if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) {
-		reset_flags |= BIT_ULL(__I40E_REINIT_REQUESTED);
+		reset_flags |= BIT(__I40E_REINIT_REQUESTED);
 		clear_bit(__I40E_REINIT_REQUESTED, &pf->state);
 	}
 	if (test_bit(__I40E_PF_RESET_REQUESTED, &pf->state)) {
-		reset_flags |= BIT_ULL(__I40E_PF_RESET_REQUESTED);
+		reset_flags |= BIT(__I40E_PF_RESET_REQUESTED);
 		clear_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
 	}
 	if (test_bit(__I40E_CORE_RESET_REQUESTED, &pf->state)) {
-		reset_flags |= BIT_ULL(__I40E_CORE_RESET_REQUESTED);
+		reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED);
 		clear_bit(__I40E_CORE_RESET_REQUESTED, &pf->state);
 	}
 	if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state)) {
-		reset_flags |= BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED);
+		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
 		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state);
 	}
 	if (test_bit(__I40E_DOWN_REQUESTED, &pf->state)) {
-		reset_flags |= BIT_ULL(__I40E_DOWN_REQUESTED);
+		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
 		clear_bit(__I40E_DOWN_REQUESTED, &pf->state);
 	}
 
@@ -6147,13 +6181,9 @@ unlock:
 static void i40e_handle_link_event(struct i40e_pf *pf,
 				   struct i40e_arq_event_info *e)
 {
-	struct i40e_hw *hw = &pf->hw;
 	struct i40e_aqc_get_link_status *status =
 		(struct i40e_aqc_get_link_status *)&e->desc.params.raw;
 
-	/* save off old link status information */
-	hw->phy.link_info_old = hw->phy.link_info;
-
 	/* Do a new status request to re-enable LSE reporting
 	 * and load new status information into the hw struct
 	 * This completely ignores any state information
@@ -6192,15 +6222,18 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
 	val = rd32(&pf->hw, pf->hw.aq.arq.len);
 	oldval = val;
 	if (val & I40E_PF_ARQLEN_ARQVFE_MASK) {
-		dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
 		val &= ~I40E_PF_ARQLEN_ARQVFE_MASK;
 	}
 	if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) {
-		dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
 		val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
 	}
 	if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
-		dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
 		val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK;
 	}
 	if (oldval != val)
@@ -6209,15 +6242,18 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
 	val = rd32(&pf->hw, pf->hw.aq.asq.len);
 	oldval = val;
 	if (val & I40E_PF_ATQLEN_ATQVFE_MASK) {
-		dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
 		val &= ~I40E_PF_ATQLEN_ATQVFE_MASK;
 	}
 	if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) {
-		dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
 		val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK;
 	}
 	if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) {
-		dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
 		val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK;
 	}
 	if (oldval != val)
@@ -6268,6 +6304,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
 			break;
 		case i40e_aqc_opc_nvm_erase:
 		case i40e_aqc_opc_nvm_update:
+		case i40e_aqc_opc_oem_post_update:
 			i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n");
 			break;
 		default:
@@ -6685,6 +6722,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
 	struct i40e_hw *hw = &pf->hw;
 	u8 set_fc_aq_fail = 0;
 	i40e_status ret;
+	u32 val;
 	u32 v;
 
 	/* Now we wait for GRST to settle out.
@@ -6823,6 +6861,20 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
 		}
 	}
 
+	/* Reconfigure hardware for allowing smaller MSS in the case
+	 * of TSO, so that we avoid the MDD being fired and causing
+	 * a reset in the case of small MSS+TSO.
+	 */
+#define I40E_REG_MSS          0x000E64DC
+#define I40E_REG_MSS_MIN_MASK 0x3FF0000
+#define I40E_64BYTE_MSS       0x400000
+	val = rd32(hw, I40E_REG_MSS);
+	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
+		val &= ~I40E_REG_MSS_MIN_MASK;
+		val |= I40E_64BYTE_MSS;
+		wr32(hw, I40E_REG_MSS, val);
+	}
+
 	if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
 	    (pf->hw.aq.fw_maj_ver < 4)) {
 		msleep(75);
@@ -7282,6 +7334,23 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
 }
 
 /**
+ * i40e_clear_rss_config_user - clear the user configured RSS hash keys
+ * and lookup table
+ * @vsi: Pointer to VSI structure
+ */
+static void i40e_clear_rss_config_user(struct i40e_vsi *vsi)
+{
+	if (!vsi)
+		return;
+
+	kfree(vsi->rss_hkey_user);
+	vsi->rss_hkey_user = NULL;
+
+	kfree(vsi->rss_lut_user);
+	vsi->rss_lut_user = NULL;
+}
+
+/**
  * i40e_vsi_clear - Deallocate the VSI provided
  * @vsi: the VSI being un-configured
  **/
@@ -7318,6 +7387,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
 
 	i40e_vsi_free_arrays(vsi, true);
+	i40e_clear_rss_config_user(vsi);
 
 	pf->vsi[vsi->idx] = NULL;
 	if (vsi->idx < pf->next_vsi)
@@ -7780,7 +7850,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
  * @vsi: vsi structure
  * @seed: RSS hash seed
  **/
-static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed)
+static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+			      u8 *lut, u16 lut_size)
 {
 	struct i40e_aqc_get_set_rss_key_data rss_key;
 	struct i40e_pf *pf = vsi->back;
@@ -7833,43 +7904,57 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
 {
 	u8 seed[I40E_HKEY_ARRAY_SIZE];
 	struct i40e_pf *pf = vsi->back;
+	u8 *lut;
+	int ret;
 
-	netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-	vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs);
+	if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
+		return 0;
 
-	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
-		return i40e_config_rss_aq(vsi, seed);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
 
-	return 0;
+	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+	netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs);
+	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+
+	return ret;
 }
 
 /**
- * i40e_config_rss_reg - Prepare for RSS if used
- * @pf: board private structure
+ * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
+ * @vsi: Pointer to vsi structure
  * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
  **/
-static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed)
+static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
+			       const u8 *lut, u16 lut_size)
 {
-	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
-	u32 *seed_dw = (u32 *)seed;
-	u32 current_queue = 0;
-	u32 lut = 0;
-	int i, j;
+	u8 i;
 
 	/* Fill out hash function seed */
-	for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
-		wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
 
-	for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) {
-		lut = 0;
-		for (j = 0; j < 4; j++) {
-			if (current_queue == vsi->rss_size)
-				current_queue = 0;
-			lut |= ((current_queue) << (8 * j));
-			current_queue++;
-		}
-		wr32(&pf->hw, I40E_PFQF_HLUT(i), lut);
+		for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+			wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
+	}
+
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (lut_size != I40E_HLUT_ARRAY_SIZE)
+			return -EINVAL;
+
+		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+			wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
 	}
 	i40e_flush(hw);
 
@@ -7877,18 +7962,101 @@ static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed)
 }
 
 /**
- * i40e_config_rss - Prepare for RSS if used
+ * i40e_get_rss_reg - Get the RSS keys and lut by reading registers
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed,
+			    u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 i;
+
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
+
+		for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+			seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i));
+	}
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (lut_size != I40E_HLUT_ARRAY_SIZE)
+			return -EINVAL;
+		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+			lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i));
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_config_rss - Configure RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
+		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
+	else
+		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40e_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	return i40e_get_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40e_fill_rss_lut - Fill the RSS lookup table with default values
+ * @pf: Pointer to board private structure
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+			      u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * i40e_pf_config_rss - Prepare for RSS if used
  * @pf: board private structure
  **/
-static int i40e_config_rss(struct i40e_pf *pf)
+static int i40e_pf_config_rss(struct i40e_pf *pf)
 {
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	u8 *lut;
 	struct i40e_hw *hw = &pf->hw;
 	u32 reg_val;
 	u64 hena;
-
-	netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	int ret;
 
 	/* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
 	hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
@@ -7898,8 +8066,6 @@ static int i40e_config_rss(struct i40e_pf *pf)
 	wr32(hw, I40E_PFQF_HENA(0), (u32)hena);
 	wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
 
-	vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs);
-
 	/* Determine the RSS table size based on the hardware capabilities */
 	reg_val = rd32(hw, I40E_PFQF_CTL_0);
 	reg_val = (pf->rss_table_size == 512) ?
@@ -7907,10 +8073,32 @@ static int i40e_config_rss(struct i40e_pf *pf)
 			(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
 	wr32(hw, I40E_PFQF_CTL_0, reg_val);
 
-	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
-		return i40e_config_rss_aq(pf->vsi[pf->lan_vsi], seed);
+	/* Determine the RSS size of the VSI */
+	if (!vsi->rss_size)
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Use user configured lut if there is one, otherwise use default */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
 	else
-		return i40e_config_rss_reg(pf, seed);
+		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+
+	/* Use user configured hash key if there is one, otherwise
+	 * use default.
+	 */
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+
+	return ret;
 }
 
 /**
@@ -7935,13 +8123,28 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
 		vsi->req_queue_pairs = queue_count;
 		i40e_prep_for_reset(pf);
 
-		pf->rss_size = new_rss_size;
+		pf->alloc_rss_size = new_rss_size;
 
 		i40e_reset_and_rebuild(pf, true);
-		i40e_config_rss(pf);
+
+		/* Discard the user configured hash keys and lut, if less
+		 * queues are enabled.
+		 */
+		if (queue_count < vsi->rss_size) {
+			i40e_clear_rss_config_user(vsi);
+			dev_dbg(&pf->pdev->dev,
+				"discard user configured hash keys and lut\n");
+		}
+
+		/* Reset vsi->rss_size, as number of enabled queues changed */
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+
+		i40e_pf_config_rss(pf);
 	}
-	dev_info(&pf->pdev->dev, "RSS count:  %d\n", pf->rss_size);
-	return pf->rss_size;
+	dev_info(&pf->pdev->dev, "RSS count/HW max RSS count:  %d/%d\n",
+		 pf->alloc_rss_size, pf->rss_size_max);
+	return pf->alloc_rss_size;
 }
 
 /**
@@ -8112,13 +8315,14 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	 * maximum might end up larger than the available queues
 	 */
 	pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
-	pf->rss_size = 1;
+	pf->alloc_rss_size = 1;
 	pf->rss_table_size = pf->hw.func_caps.rss_table_size;
 	pf->rss_size_max = min_t(int, pf->rss_size_max,
 				 pf->hw.func_caps.num_tx_qp);
 	if (pf->hw.func_caps.rss) {
 		pf->flags |= I40E_FLAG_RSS_ENABLED;
-		pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus());
+		pf->alloc_rss_size = min_t(int, pf->rss_size_max,
+					   num_online_cpus());
 	}
 
 	/* MFP mode enabled */
@@ -9051,7 +9255,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 				f->is_vf, f->is_netdev);
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
-	i40e_sync_vsi_filters(vsi, false);
+	i40e_sync_vsi_filters(vsi);
 
 	i40e_vsi_delete(vsi);
 	i40e_vsi_free_q_vectors(vsi);
@@ -9947,7 +10151,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 	 * the hash
 	 */
 	if ((pf->flags & I40E_FLAG_RSS_ENABLED))
-		i40e_config_rss(pf);
+		i40e_pf_config_rss(pf);
 
 	/* fill in link information and enable LSE reporting */
 	i40e_update_link_info(&pf->hw);
@@ -9985,7 +10189,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
 		/* one qp for PF, no queues for anything else */
 		queues_left = 0;
-		pf->rss_size = pf->num_lan_qps = 1;
+		pf->alloc_rss_size = pf->num_lan_qps = 1;
 
 		/* make sure all the fancies are disabled */
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
@@ -10002,7 +10206,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 				  I40E_FLAG_FD_ATR_ENABLED |
 				  I40E_FLAG_DCB_CAPABLE))) {
 		/* one qp for PF */
-		pf->rss_size = pf->num_lan_qps = 1;
+		pf->alloc_rss_size = pf->num_lan_qps = 1;
 		queues_left -= pf->num_lan_qps;
 
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
@@ -10072,8 +10276,9 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 		"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
 		pf->hw.func_caps.num_tx_qp,
 		!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
-		pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps,
-		pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left);
+		pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
+		pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
+		queues_left);
 #ifdef I40E_FCOE
 	dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
 #endif
@@ -10111,55 +10316,53 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
 }
 
 #define INFO_STRING_LEN 255
+#define REMAIN(__x) (INFO_STRING_LEN - (__x))
 static void i40e_print_features(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = &pf->hw;
-	char *buf, *string;
+	char *buf;
+	int i;
 
-	string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
-	if (!string) {
-		dev_err(&pf->pdev->dev, "Features string allocation failed\n");
+	buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL);
+	if (!buf)
 		return;
-	}
-
-	buf = string;
 
-	buf += sprintf(string, "Features: PF-id[%d] ", hw->pf_id);
+	i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
 #ifdef CONFIG_PCI_IOV
-	buf += sprintf(buf, "VFs: %d ", pf->num_req_vfs);
+	i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
 #endif
-	buf += sprintf(buf, "VSIs: %d QP: %d RX: %s ",
-		       pf->hw.func_caps.num_vsis,
-		       pf->vsi[pf->lan_vsi]->num_queue_pairs,
-		       pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF");
+	i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s",
+		      pf->hw.func_caps.num_vsis,
+		      pf->vsi[pf->lan_vsi]->num_queue_pairs,
+		      pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF");
 
 	if (pf->flags & I40E_FLAG_RSS_ENABLED)
-		buf += sprintf(buf, "RSS ");
+		i += snprintf(&buf[i], REMAIN(i), " RSS");
 	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
-		buf += sprintf(buf, "FD_ATR ");
+		i += snprintf(&buf[i], REMAIN(i), " FD_ATR");
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
-		buf += sprintf(buf, "FD_SB ");
-		buf += sprintf(buf, "NTUPLE ");
+		i += snprintf(&buf[i], REMAIN(i), " FD_SB");
+		i += snprintf(&buf[i], REMAIN(i), " NTUPLE");
 	}
 	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
-		buf += sprintf(buf, "DCB ");
+		i += snprintf(&buf[i], REMAIN(i), " DCB");
 #if IS_ENABLED(CONFIG_VXLAN)
-	buf += sprintf(buf, "VxLAN ");
+	i += snprintf(&buf[i], REMAIN(i), " VxLAN");
 #endif
 	if (pf->flags & I40E_FLAG_PTP)
-		buf += sprintf(buf, "PTP ");
+		i += snprintf(&buf[i], REMAIN(i), " PTP");
 #ifdef I40E_FCOE
 	if (pf->flags & I40E_FLAG_FCOE_ENABLED)
-		buf += sprintf(buf, "FCOE ");
+		i += snprintf(&buf[i], REMAIN(i), " FCOE");
 #endif
 	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
-		buf += sprintf(buf, "VEB ");
+		i += snprintf(&buf[i], REMAIN(i), " VEB");
 	else
-		buf += sprintf(buf, "VEPA ");
+		i += snprintf(&buf[i], REMAIN(i), " VEPA");
 
-	BUG_ON(buf > (string + INFO_STRING_LEN));
-	dev_info(&pf->pdev->dev, "%s\n", string);
-	kfree(string);
+	dev_info(&pf->pdev->dev, "%s\n", buf);
+	kfree(buf);
+	WARN_ON(i > INFO_STRING_LEN);
 }
 
 /**
@@ -10183,6 +10386,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u16 link_status;
 	int err;
 	u32 len;
+	u32 val;
 	u32 i;
 	u8 set_fc_aq_fail;
 
@@ -10296,6 +10500,16 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pf->hw.fc.requested_mode = I40E_FC_NONE;
 
 	err = i40e_init_adminq(hw);
+	if (err) {
+		if (err == I40E_ERR_FIRMWARE_API_VERSION)
+			dev_info(&pdev->dev,
+				 "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+		else
+			dev_info(&pdev->dev,
+				 "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
+
+		goto err_pf_reset;
+	}
 
 	/* provide nvm, fw, api versions */
 	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n",
@@ -10303,12 +10517,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
 		 i40e_nvm_version_str(hw));
 
-	if (err) {
-		dev_info(&pdev->dev,
-			 "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
-		goto err_pf_reset;
-	}
-
 	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
 	    hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
 		dev_info(&pdev->dev,
@@ -10405,7 +10613,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* NVM bit on means WoL disabled for the port */
 	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
-	if ((1 << hw->port) & wol_nvm_bits || hw->partition_id != 1)
+	if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1)
 		pf->wol_en = false;
 	else
 		pf->wol_en = true;
@@ -10487,6 +10695,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			 i40e_stat_str(&pf->hw, err),
 			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 
+	/* Reconfigure hardware for allowing smaller MSS in the case
+	 * of TSO, so that we avoid the MDD being fired and causing
+	 * a reset in the case of small MSS+TSO.
+	 */
+	val = rd32(hw, I40E_REG_MSS);
+	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
+		val &= ~I40E_REG_MSS_MIN_MASK;
+		val |= I40E_64BYTE_MSS;
+		wr32(hw, I40E_REG_MSS, val);
+	}
+
 	if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
 	    (pf->hw.aq.fw_maj_ver < 4)) {
 		msleep(75);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6649ce4ba2de..b0ae3e695783 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -235,6 +235,9 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
 				 "Filter deleted for PCTYPE %d loc = %d\n",
 				 fd_data->pctype, fd_data->fd_id);
 	}
+	if (err)
+		kfree(raw_packet);
+
 	return err ? -EOPNOTSUPP : 0;
 }
 
@@ -312,6 +315,9 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 				 fd_data->pctype, fd_data->fd_id);
 	}
 
+	if (err)
+		kfree(raw_packet);
+
 	return err ? -EOPNOTSUPP : 0;
 }
 
@@ -322,7 +328,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
  * @fd_data: the flow director data required for the FDir descriptor
  * @add: true adds a filter, false removes it
  *
- * Always returns -EOPNOTSUPP
+ * Returns 0 if the filters were successfully added or removed
  **/
 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
 				    struct i40e_fdir_filter *fd_data,
@@ -387,6 +393,9 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
 		}
 	}
 
+	if (err)
+		kfree(raw_packet);
+
 	return err ? -EOPNOTSUPP : 0;
 }
 
@@ -506,9 +515,6 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 				pf->auto_disable_flags |=
 							I40E_FLAG_FD_SB_ENABLED;
 			}
-		} else {
-			dev_info(&pdev->dev,
-				"FD filter programming failed due to incorrect filter parameters\n");
 		}
 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
@@ -526,11 +532,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 					    struct i40e_tx_buffer *tx_buffer)
 {
 	if (tx_buffer->skb) {
-		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-			kfree(tx_buffer->raw_buf);
-		else
-			dev_kfree_skb_any(tx_buffer->skb);
-
+		dev_kfree_skb_any(tx_buffer->skb);
 		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buffer, dma),
@@ -542,6 +544,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 			       dma_unmap_len(tx_buffer, len),
 			       DMA_TO_DEVICE);
 	}
+
+	if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+		kfree(tx_buffer->raw_buf);
+
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
 	dma_unmap_len_set(tx_buffer, len, 0);
@@ -1863,7 +1869,6 @@ enable_int:
 		q_vector->itr_countdown--;
 	else
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-
 }
 
 /**
@@ -1891,12 +1896,14 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 		return 0;
 	}
 
+	/* Clear hung_detected bit */
+	clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
-		arm_wb |= ring->arm_wb;
+		arm_wb = arm_wb || ring->arm_wb;
 		ring->arm_wb = false;
 	}
 
@@ -1925,8 +1932,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
 tx_only:
-		if (arm_wb)
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
 			i40e_force_wb(vsi, q_vector);
+		}
 		return budget;
 	}
 
@@ -2186,14 +2195,12 @@ out:
  * @tx_ring:  ptr to the ring to send
  * @skb:      ptr to the skb we're sending
  * @hdr_len:  ptr to the size of the packet header
- * @cd_type_cmd_tso_mss: ptr to u64 object
- * @cd_tunneling: ptr to context descriptor bits
+ * @cd_type_cmd_tso_mss: Quad Word 1
  *
  * Returns 0 if no TSO can happen, 1 if tso is going, or error
  **/
 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
-		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
-		    u32 *cd_tunneling)
+		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
 {
 	u32 cd_cmd, cd_tso_len, cd_mss;
 	struct ipv6hdr *ipv6h;
@@ -2246,7 +2253,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
  * @tx_ring:  ptr to the ring to send
  * @skb:      ptr to the skb we're sending
  * @tx_flags: the collected send information
- * @cd_type_cmd_tso_mss: ptr to u64 object
+ * @cd_type_cmd_tso_mss: Quad Word 1
  *
  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
  **/
@@ -2806,6 +2813,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	int tsyn;
 	int tso;
 
+	/* prefetch the data, we'll need it later */
+	prefetch(skb->data);
+
 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
 		return NETDEV_TX_BUSY;
 
@@ -2825,8 +2835,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	else if (protocol == htons(ETH_P_IPV6))
 		tx_flags |= I40E_TX_FLAGS_IPV6;
 
-	tso = i40e_tso(tx_ring, skb, &hdr_len,
-		       &cd_type_cmd_tso_mss, &cd_tunneling);
+	tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
 
 	if (tso < 0)
 		goto out_drop;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 6779fb771d6a..dccc1eb576f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -202,6 +202,7 @@ struct i40e_tx_queue_stats {
 	u64 tx_busy;
 	u64 tx_done_old;
 	u64 tx_linearize;
+	u64 tx_force_wb;
 };
 
 struct i40e_rx_queue_stats {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index ae879826084b..3226946bf3d4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -153,6 +153,7 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 44462b40f2d7..aa58a498c239 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -290,8 +290,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 	next_q = find_first_bit(&linklistmap,
 				(I40E_MAX_VSI_QP *
 				 I40E_VIRTCHNL_SUPPORTED_QTYPES));
-	vsi_queue_id = next_q/I40E_VIRTCHNL_SUPPORTED_QTYPES;
-	qtype = next_q%I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
 	reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
 
@@ -549,12 +549,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 			i40e_vsi_add_pvid(vsi, vf->port_vlan_id);
 
 		spin_lock_bh(&vsi->mac_filter_list_lock);
-		f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
-				    vf->port_vlan_id ? vf->port_vlan_id : -1,
-				    true, false);
-		if (!f)
-			dev_info(&pf->pdev->dev,
-				 "Could not allocate VF MAC addr\n");
+		if (is_valid_ether_addr(vf->default_lan_addr.addr)) {
+			f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
+				       vf->port_vlan_id ? vf->port_vlan_id : -1,
+				       true, false);
+			if (!f)
+				dev_info(&pf->pdev->dev,
+					 "Could not add MAC filter %pM for VF %d\n",
+					vf->default_lan_addr.addr, vf->vf_id);
+		}
 		f = i40e_add_filter(vsi, brdcast,
 				    vf->port_vlan_id ? vf->port_vlan_id : -1,
 				    true, false);
@@ -565,7 +568,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 	}
 
 	/* program mac filter */
-	ret = i40e_sync_vsi_filters(vsi, false);
+	ret = i40e_sync_vsi_filters(vsi);
 	if (ret)
 		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 
@@ -1094,8 +1097,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
 	/* single place to detect unsuccessful return values */
 	if (v_retval) {
 		vf->num_invalid_msgs++;
-		dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n",
-			v_opcode, v_retval);
+		dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n",
+			vf->vf_id, v_opcode, v_retval);
 		if (vf->num_invalid_msgs >
 		    I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
 			dev_err(&pf->pdev->dev,
@@ -1623,7 +1626,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 		if (!f) {
 			dev_err(&pf->pdev->dev,
-				"Unable to add VF MAC filter\n");
+				"Unable to add MAC filter %pM for VF %d\n",
+				 al->list[i].addr, vf->vf_id);
 			ret = I40E_ERR_PARAM;
 			spin_unlock_bh(&vsi->mac_filter_list_lock);
 			goto error_param;
@@ -1632,8 +1636,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
 	/* program the updated filter list */
-	if (i40e_sync_vsi_filters(vsi, false))
-		dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n");
+	ret = i40e_sync_vsi_filters(vsi);
+	if (ret)
+		dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
+			vf->vf_id, ret);
 
 error_param:
 	/* send the response to the VF */
@@ -1669,8 +1675,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	for (i = 0; i < al->num_elements; i++) {
 		if (is_broadcast_ether_addr(al->list[i].addr) ||
 		    is_zero_ether_addr(al->list[i].addr)) {
-			dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
-				al->list[i].addr);
+			dev_err(&pf->pdev->dev, "Invalid MAC addr %pM for VF %d\n",
+				al->list[i].addr, vf->vf_id);
 			ret = I40E_ERR_INVALID_MAC_ADDR;
 			goto error_param;
 		}
@@ -1680,13 +1686,19 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	spin_lock_bh(&vsi->mac_filter_list_lock);
 	/* delete addresses from the list */
 	for (i = 0; i < al->num_elements; i++)
-		i40e_del_filter(vsi, al->list[i].addr,
-				I40E_VLAN_ANY, true, false);
+		if (i40e_del_mac_all_vlan(vsi, al->list[i].addr, true, false)) {
+			ret = I40E_ERR_INVALID_MAC_ADDR;
+			spin_unlock_bh(&vsi->mac_filter_list_lock);
+			goto error_param;
+		}
+
 	spin_unlock_bh(&vsi->mac_filter_list_lock);
 
 	/* program the updated filter list */
-	if (i40e_sync_vsi_filters(vsi, false))
-		dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n");
+	ret = i40e_sync_vsi_filters(vsi);
+	if (ret)
+		dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
+			vf->vf_id, ret);
 
 error_param:
 	/* send the response to the VF */
@@ -1740,8 +1752,8 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 		if (ret)
 			dev_err(&pf->pdev->dev,
-				"Unable to add VF vlan filter %d, error %d\n",
-				vfl->vlan_id[i], ret);
+				"Unable to add VLAN filter %d for VF %d, error %d\n",
+				vfl->vlan_id[i], vf->vf_id, ret);
 	}
 
 error_param:
@@ -1792,8 +1804,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 		if (ret)
 			dev_err(&pf->pdev->dev,
-				"Unable to delete VF vlan filter %d, error %d\n",
-				vfl->vlan_id[i], ret);
+				"Unable to delete VLAN filter %d for VF %d, error %d\n",
+				vfl->vlan_id[i], vf->vf_id, ret);
 	}
 
 error_param:
@@ -2099,7 +2111,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
 	/* program mac filter */
-	if (i40e_sync_vsi_filters(vsi, false)) {
+	if (i40e_sync_vsi_filters(vsi)) {
 		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 		ret = -EIO;
 		goto error_param;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index fcb9ef34cc7a..f5b2b369dc7c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -227,6 +227,7 @@ enum i40e_admin_queue_opc {
 	i40e_aqc_opc_nvm_update			= 0x0703,
 	i40e_aqc_opc_nvm_config_read		= 0x0704,
 	i40e_aqc_opc_nvm_config_write		= 0x0705,
+	i40e_aqc_opc_oem_post_update		= 0x0720,
 
 	/* virtualization commands */
 	i40e_aqc_opc_send_msg_to_pf		= 0x0801,
@@ -1888,6 +1889,26 @@ struct i40e_aqc_nvm_config_data_immediate_field {
 
 I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
 
+/* OEM Post Update (indirect 0x0720)
+ * no command data struct used
+ */
+ struct i40e_aqc_nvm_oem_post_update {
+#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA	0x01
+	u8 sel_data;
+	u8 reserved[7];
+};
+
+I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update);
+
+struct i40e_aqc_nvm_oem_post_update_buffer {
+	u8 str_len;
+	u8 dev_addr;
+	__le16 eeprom_addr;
+	u8 data[36];
+};
+
+I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer);
+
 /* Send to PF command (indirect 0x0801) id is only used by PF
  * Send to VF command (indirect 0x0802) id is only used by PF
  * Send to Peer PF command (indirect 0x0803)
@@ -2311,4 +2332,4 @@ struct i40e_aqc_debug_modify_internals {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
 
-#endif
+#endif /* _I40E_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 77968b184b1f..4ca40651a228 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -51,11 +51,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 					    struct i40e_tx_buffer *tx_buffer)
 {
 	if (tx_buffer->skb) {
-		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-			kfree(tx_buffer->raw_buf);
-		else
-			dev_kfree_skb_any(tx_buffer->skb);
-
+		dev_kfree_skb_any(tx_buffer->skb);
 		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buffer, dma),
@@ -67,6 +63,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 			       dma_unmap_len(tx_buffer, len),
 			       DMA_TO_DEVICE);
 	}
+
+	if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+		kfree(tx_buffer->raw_buf);
+
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
 	dma_unmap_len_set(tx_buffer, len, 0);
@@ -127,17 +127,24 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring)
 }
 
 /**
- * i40e_get_head - Retrieve head from head writeback
- * @tx_ring:  tx ring to fetch head of
+ * i40evf_get_tx_pending - how many Tx descriptors not processed
+ * @tx_ring: the ring of descriptors
  *
- * Returns value of Tx ring head based on value stored
- * in head write-back location
+ * Since there is no access to the ring head register
+ * in XL710, we need to use our local copies
  **/
-static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
+u32 i40evf_get_tx_pending(struct i40e_ring *ring)
 {
-	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
+	u32 head, tail;
 
-	return le32_to_cpu(*(volatile __le32 *)head);
+	head = i40e_get_head(ring);
+	tail = readl(ring->tail);
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+
+	return 0;
 }
 
 #define WB_STRIDE 0x3
@@ -245,16 +252,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
-	/* check to see if there are any non-cache aligned descriptors
-	 * waiting to be written back, and kick the hardware to force
-	 * them to be written back in case of napi polling
-	 */
-	if (budget &&
-	    !((i & WB_STRIDE) == WB_STRIDE) &&
-	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
-	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
-		tx_ring->arm_wb = true;
-
 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
 						      tx_ring->queue_index),
 				  total_packets, total_bytes);
@@ -414,7 +411,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	return false;
 }
 
-/*
+/**
  * i40evf_setup_tx_descriptors - Allocate the Tx descriptors
  * @tx_ring: the tx ring to set up
  *
@@ -1262,10 +1259,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
 		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
 	}
+
 	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
 		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
 		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
 	}
+
 	if (rx || tx) {
 		/* get the higher of the two ITR adjustments and
 		 * use the same value for both ITR registers
@@ -1301,7 +1300,6 @@ enable_int:
 		q_vector->itr_countdown--;
 	else
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-
 }
 
 /**
@@ -1334,7 +1332,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
-		arm_wb |= ring->arm_wb;
+		arm_wb = arm_wb || ring->arm_wb;
 		ring->arm_wb = false;
 	}
 
@@ -1363,8 +1361,10 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
 tx_only:
-		if (arm_wb)
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
 			i40evf_force_wb(vsi, q_vector);
+		}
 		return budget;
 	}
 
@@ -1436,13 +1436,12 @@ out:
  * @tx_ring:  ptr to the ring to send
  * @skb:      ptr to the skb we're sending
  * @hdr_len:  ptr to the size of the packet header
- * @cd_tunneling: ptr to context descriptor bits
+ * @cd_type_cmd_tso_mss: Quad Word 1
  *
  * Returns 0 if no TSO can happen, 1 if tso is going, or error
  **/
 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
-		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
-		    u32 *cd_tunneling)
+		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
 {
 	u32 cd_cmd, cd_tso_len, cd_mss;
 	struct ipv6hdr *ipv6h;
@@ -1554,7 +1553,6 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 			*tx_flags |= I40E_TX_FLAGS_IPV6;
 		}
 
-
 		if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
 		    (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING)        &&
 		    (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
@@ -1653,7 +1651,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
 }
 
- /**
+/**
  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
  * @skb:      send buffer
  * @tx_flags: collected send information
@@ -1769,6 +1767,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	u32 td_tag = 0;
 	dma_addr_t dma;
 	u16 gso_segs;
+	u16 desc_count = 0;
+	bool tail_bump = true;
+	bool do_rs = false;
 
 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -1809,6 +1810,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 			tx_desc++;
 			i++;
+			desc_count++;
+
 			if (i == tx_ring->count) {
 				tx_desc = I40E_TX_DESC(tx_ring, 0);
 				i = 0;
@@ -1828,6 +1831,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 		tx_desc++;
 		i++;
+		desc_count++;
+
 		if (i == tx_ring->count) {
 			tx_desc = I40E_TX_DESC(tx_ring, 0);
 			i = 0;
@@ -1842,35 +1847,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 		tx_bi = &tx_ring->tx_bi[i];
 	}
 
-	/* Place RS bit on last descriptor of any packet that spans across the
-	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
-	 */
-#define WB_STRIDE 0x3
-	if (((i & WB_STRIDE) != WB_STRIDE) &&
-	    (first <= &tx_ring->tx_bi[i]) &&
-	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
-		tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag) |
-			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
-					 I40E_TXD_QW1_CMD_SHIFT);
-	} else {
-		tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag) |
-			cpu_to_le64((u64)I40E_TXD_CMD <<
-					 I40E_TXD_QW1_CMD_SHIFT);
-	}
-
-	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
-						 tx_ring->queue_index),
-			     first->bytecount);
-
-	/* Force memory writes to complete before letting h/w
-	 * know there are new descriptors to fetch.  (Only
-	 * applicable for weak-ordered memory model archs,
-	 * such as IA-64).
-	 */
-	wmb();
-
 	/* set next_to_watch value indicating a packet is present */
 	first->next_to_watch = tx_desc;
 
@@ -1880,15 +1856,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	tx_ring->next_to_use = i;
 
+	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
+						 tx_ring->queue_index),
+						 first->bytecount);
 	i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* Algorithm to optimize tail and RS bit setting:
+	 * if xmit_more is supported
+	 *	if xmit_more is true
+	 *		do not update tail and do not mark RS bit.
+	 *	if xmit_more is false and last xmit_more was false
+	 *		if every packet spanned less than 4 desc
+	 *			then set RS bit on 4th packet and update tail
+	 *			on every packet
+	 *		else
+	 *			update tail and set RS bit on every packet.
+	 *	if xmit_more is false and last_xmit_more was true
+	 *		update tail and set RS bit.
+	 *
+	 * Optimization: wmb to be issued only in case of tail update.
+	 * Also optimize the Descriptor WB path for RS bit with the same
+	 * algorithm.
+	 *
+	 * Note: If there are less than 4 packets
+	 * pending and interrupts were disabled the service task will
+	 * trigger a force WB.
+	 */
+	if (skb->xmit_more  &&
+	    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+						    tx_ring->queue_index))) {
+		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+		tail_bump = false;
+	} else if (!skb->xmit_more &&
+		   !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
+						       tx_ring->queue_index)) &&
+		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
+		   (tx_ring->packet_stride < WB_STRIDE) &&
+		   (desc_count < WB_STRIDE)) {
+		tx_ring->packet_stride++;
+	} else {
+		tx_ring->packet_stride = 0;
+		tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
+		do_rs = true;
+	}
+	if (do_rs)
+		tx_ring->packet_stride = 0;
+
+	tx_desc->cmd_type_offset_bsz =
+			build_ctob(td_cmd, td_offset, size, td_tag) |
+			cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
+						  I40E_TX_DESC_CMD_EOP) <<
+						  I40E_TXD_QW1_CMD_SHIFT);
+
 	/* notify HW of packet */
-	if (!skb->xmit_more ||
-	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-						   tx_ring->queue_index)))
-		writel(i, tx_ring->tail);
-	else
+	if (!tail_bump)
 		prefetchw(tx_desc + 1);
 
+	if (tail_bump) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, tx_ring->tail);
+	}
+
 	return;
 
 dma_error:
@@ -1960,6 +1993,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	u8 hdr_len = 0;
 	int tso;
 
+	/* prefetch the data, we'll need it later */
+	prefetch(skb->data);
+
 	if (0 == i40evf_xmit_descriptor_count(skb, tx_ring))
 		return NETDEV_TX_BUSY;
 
@@ -1979,8 +2015,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	else if (protocol == htons(ETH_P_IPV6))
 		tx_flags |= I40E_TX_FLAGS_IPV6;
 
-	tso = i40e_tso(tx_ring, skb, &hdr_len,
-		       &cd_type_cmd_tso_mss, &cd_tunneling);
+	tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
 
 	if (tso < 0)
 		goto out_drop;
@@ -2028,7 +2063,7 @@ out_drop:
 netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping];
+	struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping];
 
 	/* hardware can't handle really short frames, hardware padding works
 	 * beyond this point
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index ebc1bf77f036..e29bb3e86cfd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -201,6 +201,7 @@ struct i40e_tx_queue_stats {
 	u64 tx_busy;
 	u64 tx_done_old;
 	u64 tx_linearize;
+	u64 tx_force_wb;
 };
 
 struct i40e_rx_queue_stats {
@@ -267,6 +268,8 @@ struct i40e_ring {
 
 	bool ring_active;		/* is ring online or not */
 	bool arm_wb;		/* do something to arm write back */
+	u8 packet_stride;
+#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
 
 	u16 flags;
 #define I40E_TXR_FLAGS_WB_ON_ITR	BIT(0)
@@ -321,4 +324,19 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring);
 void i40evf_free_tx_resources(struct i40e_ring *tx_ring);
 void i40evf_free_rx_resources(struct i40e_ring *rx_ring);
 int i40evf_napi_poll(struct napi_struct *napi, int budget);
+u32 i40evf_get_tx_pending(struct i40e_ring *ring);
+
+/**
+ * i40e_get_head - Retrieve head from head writeback
+ * @tx_ring: Tx ring to fetch head of
+ *
+ * Returns value of Tx ring head based on value stored
+ * in head write-back location
+ **/
+static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
+{
+	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
+
+	return le32_to_cpu(*(volatile __le32 *)head);
+}
 #endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index 9f7b279b9d9c..3b9d2037456c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
@@ -153,6 +153,7 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR	0x00000020
 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 22fc3d49c4b9..be1b72b93888 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -67,6 +67,8 @@ struct i40e_vsi {
 	u16 rx_itr_setting;
 	u16 tx_itr_setting;
 	u16 qs_handle;
+	u8 *rss_hkey_user; /* User configured hash keys */
+	u8 *rss_lut_user;  /* User configured lookup table entries */
 };
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
@@ -95,10 +97,10 @@ struct i40e_vsi {
 #define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i]))
 #define I40E_TX_CTXTDESC(R, i) \
 	(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
-#define MAX_RX_QUEUES 8
-#define MAX_TX_QUEUES MAX_RX_QUEUES
+#define MAX_QUEUES 16
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
+#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
 
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
@@ -142,9 +144,6 @@ struct i40e_q_vector {
 #define OTHER_VECTOR 1
 #define NONQ_VECS (OTHER_VECTOR)
 
-#define MAX_MSIX_Q_VECTORS 4
-#define MAX_MSIX_COUNT 5
-
 #define MIN_MSIX_Q_VECTORS 1
 #define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS)
 
@@ -190,19 +189,19 @@ struct i40evf_adapter {
 	struct work_struct reset_task;
 	struct work_struct adminq_task;
 	struct delayed_work init_task;
-	struct i40e_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
+	struct i40e_q_vector *q_vectors;
 	struct list_head vlan_filter_list;
 	char misc_vector_name[IFNAMSIZ + 9];
 	int num_active_queues;
 
 	/* TX */
-	struct i40e_ring *tx_rings[I40E_MAX_VSI_QP];
+	struct i40e_ring *tx_rings;
 	u32 tx_timeout_count;
 	struct list_head mac_filter_list;
 	u32 tx_desc_count;
 
 	/* RX */
-	struct i40e_ring *rx_rings[I40E_MAX_VSI_QP];
+	struct i40e_ring *rx_rings;
 	u64 hw_csum_rx_error;
 	u32 rx_desc_count;
 	int num_msix_vectors;
@@ -313,4 +312,8 @@ void i40evf_request_reset(struct i40evf_adapter *adapter);
 void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				enum i40e_virtchnl_ops v_opcode,
 				i40e_status v_retval, u8 *msg, u16 msglen);
+int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut,
+		      u16 lut_size);
+int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut,
+		   u16 lut_size);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 4790437a50ac..a4c9feb589e7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -121,12 +121,12 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
 		data[i] =  *(u64 *)p;
 	}
 	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->tx_rings[j]->stats.packets;
-		data[i++] = adapter->tx_rings[j]->stats.bytes;
+		data[i++] = adapter->tx_rings[j].stats.packets;
+		data[i++] = adapter->tx_rings[j].stats.bytes;
 	}
 	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->rx_rings[j]->stats.packets;
-		data[i++] = adapter->rx_rings[j]->stats.bytes;
+		data[i++] = adapter->rx_rings[j].stats.packets;
+		data[i++] = adapter->rx_rings[j].stats.bytes;
 	}
 }
 
@@ -351,7 +351,7 @@ static int i40evf_set_coalesce(struct net_device *netdev,
 		vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
-		q_vector = adapter->q_vector[i];
+		q_vector = &adapter->q_vectors[i];
 		q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
 		wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr);
 		q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
@@ -634,25 +634,34 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 			   u8 *hfunc)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_hw *hw = &adapter->hw;
-	u32 hlut_val;
-	int i, j;
+	struct i40e_vsi *vsi = &adapter->vsi;
+	u8 *seed = NULL, *lut;
+	int ret;
+	u16 i;
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 	if (!indir)
 		return 0;
 
-	if (indir) {
-		for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
-			hlut_val = rd32(hw, I40E_VFQF_HLUT(i));
-			indir[j++] = hlut_val & 0xff;
-			indir[j++] = (hlut_val >> 8) & 0xff;
-			indir[j++] = (hlut_val >> 16) & 0xff;
-			indir[j++] = (hlut_val >> 24) & 0xff;
-		}
-	}
-	return 0;
+	seed = key;
+
+	lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE);
+	if (ret)
+		goto out;
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++)
+		indir[i] = (u32)lut[i];
+
+out:
+	kfree(lut);
+
+	return ret;
 }
 
 /**
@@ -668,9 +677,9 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
 			   const u8 *key, const u8 hfunc)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_hw *hw = &adapter->hw;
-	u32 hlut_val;
-	int i, j;
+	struct i40e_vsi *vsi = &adapter->vsi;
+	u8 *seed = NULL;
+	u16 i;
 
 	/* We do not allow change in unsupported parameters */
 	if (key ||
@@ -679,15 +688,29 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
 	if (!indir)
 		return 0;
 
-	for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
-		hlut_val = indir[j++];
-		hlut_val |= indir[j++] << 8;
-		hlut_val |= indir[j++] << 16;
-		hlut_val |= indir[j++] << 24;
-		wr32(hw, I40E_VFQF_HLUT(i), hlut_val);
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE,
+						     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
+	}
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE,
+					    GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
 	}
 
-	return 0;
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++)
+		vsi->rss_lut_user[i] = (u8)(indir[i]);
+
+	return i40evf_config_rss(vsi, seed, vsi->rss_lut_user,
+				 I40EVF_HLUT_ARRAY_SIZE);
 }
 
 static const struct ethtool_ops i40evf_ethtool_ops = {
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index d962164dfb0f..455394cf7f80 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -34,7 +34,15 @@ char i40evf_driver_name[] = "i40evf";
 static const char i40evf_driver_string[] =
 	"Intel(R) XL710/X710 Virtual Function Network Driver";
 
-#define DRV_VERSION "1.3.33"
+#define DRV_KERN "-k"
+
+#define DRV_VERSION_MAJOR 1
+#define DRV_VERSION_MINOR 4
+#define DRV_VERSION_BUILD 4
+#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
+	     __stringify(DRV_VERSION_MINOR) "." \
+	     __stringify(DRV_VERSION_BUILD) \
+	     DRV_KERN
 const char i40evf_driver_version[] = DRV_VERSION;
 static const char i40evf_copyright[] =
 	"Copyright (c) 2013 - 2015 Intel Corporation.";
@@ -259,7 +267,7 @@ static void i40evf_fire_sw_int(struct i40evf_adapter *adapter, u32 mask)
 {
 	struct i40e_hw *hw = &adapter->hw;
 	int i;
-	uint32_t dyn_ctl;
+	u32 dyn_ctl;
 
 	if (mask & 1) {
 		dyn_ctl = rd32(hw, I40E_VFINT_DYN_CTL01);
@@ -307,10 +315,9 @@ static irqreturn_t i40evf_msix_aq(int irq, void *data)
 	struct i40e_hw *hw = &adapter->hw;
 	u32 val;
 
-	/* handle non-queue interrupts */
-	rd32(hw, I40E_VFINT_ICR01);
-	rd32(hw, I40E_VFINT_ICR0_ENA1);
-
+	/* handle non-queue interrupts, these reads clear the registers */
+	val = rd32(hw, I40E_VFINT_ICR01);
+	val = rd32(hw, I40E_VFINT_ICR0_ENA1);
 
 	val = rd32(hw, I40E_VFINT_DYN_CTL01) |
 	      I40E_VFINT_DYN_CTL01_CLEARPBA_MASK;
@@ -348,8 +355,8 @@ static irqreturn_t i40evf_msix_clean_rings(int irq, void *data)
 static void
 i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 {
-	struct i40e_q_vector *q_vector = adapter->q_vector[v_idx];
-	struct i40e_ring *rx_ring = adapter->rx_rings[r_idx];
+	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx];
 
 	rx_ring->q_vector = q_vector;
 	rx_ring->next = q_vector->rx.ring;
@@ -369,8 +376,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 static void
 i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 {
-	struct i40e_q_vector *q_vector = adapter->q_vector[v_idx];
-	struct i40e_ring *tx_ring = adapter->tx_rings[t_idx];
+	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx];
 
 	tx_ring->q_vector = q_vector;
 	tx_ring->next = q_vector->tx.ring;
@@ -465,7 +472,7 @@ static void i40evf_netpoll(struct net_device *netdev)
 		return;
 
 	for (i = 0; i < q_vectors; i++)
-		i40evf_msix_clean_rings(0, adapter->q_vector[i]);
+		i40evf_msix_clean_rings(0, &adapter->q_vectors[i]);
 }
 
 #endif
@@ -487,7 +494,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
 	for (vector = 0; vector < q_vectors; vector++) {
-		struct i40e_q_vector *q_vector = adapter->q_vector[vector];
+		struct i40e_q_vector *q_vector = &adapter->q_vectors[vector];
 
 		if (q_vector->tx.ring && q_vector->rx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
@@ -532,7 +539,7 @@ free_queue_irqs:
 			adapter->msix_entries[vector + NONQ_VECS].vector,
 			NULL);
 		free_irq(adapter->msix_entries[vector + NONQ_VECS].vector,
-			 adapter->q_vector[vector]);
+			 &adapter->q_vectors[vector]);
 	}
 	return err;
 }
@@ -582,7 +589,7 @@ static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter)
 		irq_set_affinity_hint(adapter->msix_entries[i+1].vector,
 				      NULL);
 		free_irq(adapter->msix_entries[i+1].vector,
-			 adapter->q_vector[i]);
+			 &adapter->q_vectors[i]);
 	}
 }
 
@@ -611,7 +618,7 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter)
 	int i;
 
 	for (i = 0; i < adapter->num_active_queues; i++)
-		adapter->tx_rings[i]->tail = hw->hw_addr + I40E_QTX_TAIL1(i);
+		adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i);
 }
 
 /**
@@ -656,8 +663,8 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
 	}
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i]->tail = hw->hw_addr + I40E_QRX_TAIL1(i);
-		adapter->rx_rings[i]->rx_buf_len = rx_buf_len;
+		adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
+		adapter->rx_rings[i].rx_buf_len = rx_buf_len;
 	}
 }
 
@@ -954,7 +961,7 @@ static void i40evf_napi_enable_all(struct i40evf_adapter *adapter)
 	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
 		struct napi_struct *napi;
 
-		q_vector = adapter->q_vector[q_idx];
+		q_vector = &adapter->q_vectors[q_idx];
 		napi = &q_vector->napi;
 		napi_enable(napi);
 	}
@@ -971,7 +978,7 @@ static void i40evf_napi_disable_all(struct i40evf_adapter *adapter)
 	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
 	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
-		q_vector = adapter->q_vector[q_idx];
+		q_vector = &adapter->q_vectors[q_idx];
 		napi_disable(&q_vector->napi);
 	}
 }
@@ -992,7 +999,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter)
 	adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		struct i40e_ring *ring = adapter->rx_rings[i];
+		struct i40e_ring *ring = &adapter->rx_rings[i];
 
 		i40evf_alloc_rx_buffers_1buf(ring, ring->count);
 		ring->next_to_use = ring->count - 1;
@@ -1112,16 +1119,10 @@ i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors)
  **/
 static void i40evf_free_queues(struct i40evf_adapter *adapter)
 {
-	int i;
-
 	if (!adapter->vsi_res)
 		return;
-	for (i = 0; i < adapter->num_active_queues; i++) {
-		if (adapter->tx_rings[i])
-			kfree_rcu(adapter->tx_rings[i], rcu);
-		adapter->tx_rings[i] = NULL;
-		adapter->rx_rings[i] = NULL;
-	}
+	kfree(adapter->tx_rings);
+	kfree(adapter->rx_rings);
 }
 
 /**
@@ -1136,13 +1137,20 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 {
 	int i;
 
+	adapter->tx_rings = kcalloc(adapter->num_active_queues,
+				    sizeof(struct i40e_ring), GFP_KERNEL);
+	if (!adapter->tx_rings)
+		goto err_out;
+	adapter->rx_rings = kcalloc(adapter->num_active_queues,
+				    sizeof(struct i40e_ring), GFP_KERNEL);
+	if (!adapter->rx_rings)
+		goto err_out;
+
 	for (i = 0; i < adapter->num_active_queues; i++) {
 		struct i40e_ring *tx_ring;
 		struct i40e_ring *rx_ring;
 
-		tx_ring = kzalloc(sizeof(*tx_ring) * 2, GFP_KERNEL);
-		if (!tx_ring)
-			goto err_out;
+		tx_ring = &adapter->tx_rings[i];
 
 		tx_ring->queue_index = i;
 		tx_ring->netdev = adapter->netdev;
@@ -1150,14 +1158,12 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		tx_ring->count = adapter->tx_desc_count;
 		if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
-		adapter->tx_rings[i] = tx_ring;
 
-		rx_ring = &tx_ring[1];
+		rx_ring = &adapter->rx_rings[i];
 		rx_ring->queue_index = i;
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
 		rx_ring->count = adapter->rx_desc_count;
-		adapter->rx_rings[i] = rx_ring;
 	}
 
 	return 0;
@@ -1207,115 +1213,273 @@ static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter)
 	err = i40evf_acquire_msix_vectors(adapter, v_budget);
 
 out:
-	adapter->netdev->real_num_tx_queues = pairs;
+	netif_set_real_num_rx_queues(adapter->netdev, pairs);
+	netif_set_real_num_tx_queues(adapter->netdev, pairs);
 	return err;
 }
 
 /**
- * i40e_configure_rss_aq - Prepare for RSS using AQ commands
+ * i40e_config_rss_aq - Prepare for RSS using AQ commands
  * @vsi: vsi structure
  * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Return 0 on success, negative on failure
  **/
-static void i40evf_configure_rss_aq(struct i40e_vsi *vsi, const u8 *seed)
+static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+				u8 *lut, u16 lut_size)
 {
-	struct i40e_aqc_get_set_rss_key_data rss_key;
 	struct i40evf_adapter *adapter = vsi->back;
 	struct i40e_hw *hw = &adapter->hw;
-	int ret = 0, i;
-	u8 *rss_lut;
+	int ret = 0;
 
 	if (!vsi->id)
-		return;
+		return -EINVAL;
 
 	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
 		dev_err(&adapter->pdev->dev, "Cannot confiure RSS, command %d pending\n",
 			adapter->current_op);
-		return;
+		return -EBUSY;
 	}
 
-	memset(&rss_key, 0, sizeof(rss_key));
-	memcpy(&rss_key, seed, sizeof(rss_key));
+	if (seed) {
+		struct i40e_aqc_get_set_rss_key_data *rss_key =
+			(struct i40e_aqc_get_set_rss_key_data *)seed;
+		ret = i40evf_aq_set_rss_key(hw, vsi->id, rss_key);
+		if (ret) {
+			dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n",
+				i40evf_stat_str(hw, ret),
+				i40evf_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
 
-	rss_lut = kzalloc(((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4), GFP_KERNEL);
-	if (!rss_lut)
-		return;
+	if (lut) {
+		ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, lut, lut_size);
+		if (ret) {
+			dev_err(&adapter->pdev->dev,
+				"Cannot set RSS lut, err %s aq_err %s\n",
+				i40evf_stat_str(hw, ret),
+				i40evf_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
 
-	/* Populate the LUT with max no. PF queues in round robin fashion */
-	for (i = 0; i <= (I40E_VFQF_HLUT_MAX_INDEX * 4); i++)
-		rss_lut[i] = i % adapter->num_active_queues;
+	return ret;
+}
 
-	ret = i40evf_aq_set_rss_key(hw, vsi->id, &rss_key);
-	if (ret) {
-		dev_err(&adapter->pdev->dev,
-			"Cannot set RSS key, err %s aq_err %s\n",
-			i40evf_stat_str(hw, ret),
-			i40evf_aq_str(hw, hw->aq.asq_last_status));
-		return;
+/**
+ * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers
+ * @vsi: Pointer to vsi structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40evf_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
+				 const u8 *lut, u16 lut_size)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+	struct i40e_hw *hw = &adapter->hw;
+	u16 i;
+
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
+
+		for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++)
+			wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]);
 	}
 
-	ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, rss_lut,
-				    (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4);
-	if (ret)
-		dev_err(&adapter->pdev->dev,
-			"Cannot set RSS lut, err %s aq_err %s\n",
-			i40evf_stat_str(hw, ret),
-			i40evf_aq_str(hw, hw->aq.asq_last_status));
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (lut_size != I40EVF_HLUT_ARRAY_SIZE)
+			return -EINVAL;
+
+		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+			wr32(hw, I40E_VFQF_HLUT(i), lut_dw[i]);
+	}
+	i40e_flush(hw);
+
+	return 0;
 }
 
 /**
- * i40e_configure_rss_reg - Prepare for RSS if used
- * @adapter: board private structure
- * @seed: RSS hash seed
+ *  * i40evf_get_rss_aq - Get RSS keys and lut by using AQ commands
+ *  @vsi: Pointer to vsi structure
+ *  @seed: RSS hash seed
+ *  @lut: Lookup table
+ *  @lut_size: Lookup table size
+ *
+ *  Return 0 on success, negative on failure
  **/
-static void i40evf_configure_rss_reg(struct i40evf_adapter *adapter,
-				     const u8 *seed)
+static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+			     u8 *lut, u16 lut_size)
 {
+	struct i40evf_adapter *adapter = vsi->back;
 	struct i40e_hw *hw = &adapter->hw;
-	u32 *seed_dw = (u32 *)seed;
-	u32 cqueue = 0;
-	u32 lut = 0;
-	int i, j;
+	int ret = 0;
 
-	/* Fill out hash function seed */
-	for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++)
-		wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]);
-
-	/* Populate the LUT with max no. PF queues in round robin fashion */
-	for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
-		lut = 0;
-		for (j = 0; j < 4; j++) {
-			if (cqueue == adapter->num_active_queues)
-				cqueue = 0;
-			lut |= ((cqueue) << (8 * j));
-			cqueue++;
+	if (seed) {
+		ret = i40evf_aq_get_rss_key(hw, vsi->id,
+			(struct i40e_aqc_get_set_rss_key_data *)seed);
+		if (ret) {
+			dev_err(&adapter->pdev->dev,
+				"Cannot get RSS key, err %s aq_err %s\n",
+				i40evf_stat_str(hw, ret),
+				i40evf_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
 		}
-		wr32(hw, I40E_VFQF_HLUT(i), lut);
 	}
-	i40e_flush(hw);
+
+	if (lut) {
+		ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size);
+		if (ret) {
+			dev_err(&adapter->pdev->dev,
+				"Cannot get RSS lut, err %s aq_err %s\n",
+				i40evf_stat_str(hw, ret),
+				i40evf_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
+
+	return ret;
 }
 
 /**
- * i40evf_configure_rss - Prepare for RSS
+ *  * i40evf_get_rss_reg - Get RSS keys and lut by reading registers
+ *  @vsi: Pointer to vsi structure
+ *  @seed: RSS hash seed
+ *  @lut: Lookup table
+ *  @lut_size: Lookup table size
+ *
+ *  Returns 0 on success, negative on failure
+ **/
+static int i40evf_get_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
+			      const u8 *lut, u16 lut_size)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+	struct i40e_hw *hw = &adapter->hw;
+	u16 i;
+
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
+
+		for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++)
+			seed_dw[i] = rd32(hw, I40E_VFQF_HKEY(i));
+	}
+
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (lut_size != I40EVF_HLUT_ARRAY_SIZE)
+			return -EINVAL;
+
+		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+			lut_dw[i] = rd32(hw, I40E_VFQF_HLUT(i));
+	}
+
+	return 0;
+}
+
+/**
+ * i40evf_config_rss - Configure RSS keys and lut
+ * @vsi: Pointer to vsi structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed,
+		      u8 *lut, u16 lut_size)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+
+	if (RSS_AQ(adapter))
+		return i40evf_config_rss_aq(vsi, seed, lut, lut_size);
+	else
+		return i40evf_config_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40evf_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to vsi structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+
+	if (RSS_AQ(adapter))
+		return i40evf_get_rss_aq(vsi, seed, lut, lut_size);
+	else
+		return i40evf_get_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40evf_fill_rss_lut - Fill the lut with default values
+ * @lut: Lookup table to be filled with
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ **/
+static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * i40evf_init_rss - Prepare for RSS
  * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
  **/
-static void i40evf_configure_rss(struct i40evf_adapter *adapter)
+static int i40evf_init_rss(struct i40evf_adapter *adapter)
 {
+	struct i40e_vsi *vsi = &adapter->vsi;
 	struct i40e_hw *hw = &adapter->hw;
 	u8 seed[I40EVF_HKEY_ARRAY_SIZE];
 	u64 hena;
-
-	netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE);
+	u8 *lut;
+	int ret;
 
 	/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
 	hena = I40E_DEFAULT_RSS_HENA;
 	wr32(hw, I40E_VFQF_HENA(0), (u32)hena);
 	wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32));
 
-	if (RSS_AQ(adapter))
-		i40evf_configure_rss_aq(&adapter->vsi, seed);
+	lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Use user configured lut if there is one, otherwise use default */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, I40EVF_HLUT_ARRAY_SIZE);
+	else
+		i40evf_fill_rss_lut(lut, I40EVF_HLUT_ARRAY_SIZE,
+				    adapter->num_active_queues);
+
+	/* Use user configured hash key if there is one, otherwise
+	 * user default.
+	 */
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40EVF_HKEY_ARRAY_SIZE);
 	else
-		i40evf_configure_rss_reg(adapter, seed);
+		netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE);
+	ret = i40evf_config_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE);
+	kfree(lut);
+
+	return ret;
 }
 
 /**
@@ -1327,21 +1491,22 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
  **/
 static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter)
 {
-	int q_idx, num_q_vectors;
+	int q_idx = 0, num_q_vectors;
 	struct i40e_q_vector *q_vector;
 
 	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+	adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector),
+				     GFP_KERNEL);
+	if (!adapter->q_vectors)
+		goto err_out;
 
 	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
-		if (!q_vector)
-			goto err_out;
+		q_vector = &adapter->q_vectors[q_idx];
 		q_vector->adapter = adapter;
 		q_vector->vsi = &adapter->vsi;
 		q_vector->v_idx = q_idx;
 		netif_napi_add(adapter->netdev, &q_vector->napi,
 			       i40evf_napi_poll, NAPI_POLL_WEIGHT);
-		adapter->q_vector[q_idx] = q_vector;
 	}
 
 	return 0;
@@ -1349,11 +1514,10 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter)
 err_out:
 	while (q_idx) {
 		q_idx--;
-		q_vector = adapter->q_vector[q_idx];
+		q_vector = &adapter->q_vectors[q_idx];
 		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
-		adapter->q_vector[q_idx] = NULL;
 	}
+	kfree(adapter->q_vectors);
 	return -ENOMEM;
 }
 
@@ -1374,13 +1538,11 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter)
 	napi_vectors = adapter->num_active_queues;
 
 	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		struct i40e_q_vector *q_vector = adapter->q_vector[q_idx];
-
-		adapter->q_vector[q_idx] = NULL;
+		struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx];
 		if (q_idx < napi_vectors)
 			netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
 	}
+	kfree(adapter->q_vectors);
 }
 
 /**
@@ -1439,6 +1601,22 @@ err_set_interrupt:
 }
 
 /**
+ * i40evf_clear_rss_config_user - Clear user configurations of RSS
+ * @vsi: Pointer to VSI structure
+ **/
+static void i40evf_clear_rss_config_user(struct i40e_vsi *vsi)
+{
+	if (!vsi)
+		return;
+
+	kfree(vsi->rss_hkey_user);
+	vsi->rss_hkey_user = NULL;
+
+	kfree(vsi->rss_lut_user);
+	vsi->rss_lut_user = NULL;
+}
+
+/**
  * i40evf_watchdog_timer - Periodic call-back timer
  * @data: pointer to adapter disguised as unsigned long
  **/
@@ -1565,7 +1743,7 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		 * PF, so we don't have to set current_op as we will
 		 * not get a response through the ARQ.
 		 */
-		i40evf_configure_rss(adapter);
+		i40evf_init_rss(adapter);
 		adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS;
 		goto watchdog_done;
 	}
@@ -1864,9 +2042,12 @@ void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter)
 {
 	int i;
 
+	if (!adapter->tx_rings)
+		return;
+
 	for (i = 0; i < adapter->num_active_queues; i++)
-		if (adapter->tx_rings[i]->desc)
-			i40evf_free_tx_resources(adapter->tx_rings[i]);
+		if (adapter->tx_rings[i].desc)
+			i40evf_free_tx_resources(&adapter->tx_rings[i]);
 }
 
 /**
@@ -1884,8 +2065,8 @@ static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter)
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->tx_rings[i]->count = adapter->tx_desc_count;
-		err = i40evf_setup_tx_descriptors(adapter->tx_rings[i]);
+		adapter->tx_rings[i].count = adapter->tx_desc_count;
+		err = i40evf_setup_tx_descriptors(&adapter->tx_rings[i]);
 		if (!err)
 			continue;
 		dev_err(&adapter->pdev->dev,
@@ -1911,8 +2092,8 @@ static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter)
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_active_queues; i++) {
-		adapter->rx_rings[i]->count = adapter->rx_desc_count;
-		err = i40evf_setup_rx_descriptors(adapter->rx_rings[i]);
+		adapter->rx_rings[i].count = adapter->rx_desc_count;
+		err = i40evf_setup_rx_descriptors(&adapter->rx_rings[i]);
 		if (!err)
 			continue;
 		dev_err(&adapter->pdev->dev,
@@ -1932,9 +2113,12 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 {
 	int i;
 
+	if (!adapter->rx_rings)
+		return;
+
 	for (i = 0; i < adapter->num_active_queues; i++)
-		if (adapter->rx_rings[i]->desc)
-			i40evf_free_rx_resources(adapter->rx_rings[i]);
+		if (adapter->rx_rings[i].desc)
+			i40evf_free_rx_resources(&adapter->rx_rings[i]);
 }
 
 /**
@@ -2263,6 +2447,14 @@ static void i40evf_init_task(struct work_struct *work)
 		if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) {
 			err = i40evf_send_vf_config_msg(adapter);
 			goto err;
+		} else if (err == I40E_ERR_PARAM) {
+			/* We only get ERR_PARAM if the device is in a very bad
+			 * state or if we've been disabled for previous bad
+			 * behavior. Either way, we're done now.
+			 */
+			i40evf_shutdown_adminq(hw);
+			dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
+			return;
 		}
 		if (err) {
 			dev_err(&pdev->dev, "Unable to get VF config (%d)\n",
@@ -2313,7 +2505,7 @@ static void i40evf_init_task(struct work_struct *work)
 		    I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
 		adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE;
 	if (!RSS_AQ(adapter))
-		i40evf_configure_rss(adapter);
+		i40evf_init_rss(adapter);
 	err = i40evf_request_misc_irq(adapter);
 	if (err)
 		goto err_sw_init;
@@ -2334,7 +2526,6 @@ static void i40evf_init_task(struct work_struct *work)
 	if (netdev->features & NETIF_F_GRO)
 		dev_info(&pdev->dev, "GRO is enabled\n");
 
-	dev_info(&pdev->dev, "%s\n", i40evf_driver_string);
 	adapter->state = __I40EVF_DOWN;
 	set_bit(__I40E_DOWN, &adapter->vsi.state);
 	i40evf_misc_irq_enable(adapter);
@@ -2343,7 +2534,7 @@ static void i40evf_init_task(struct work_struct *work)
 		adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS;
 		mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
 	} else {
-		i40evf_configure_rss(adapter);
+		i40evf_init_rss(adapter);
 	}
 	return;
 restart:
@@ -2438,8 +2629,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
-	netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter),
-				   MAX_TX_QUEUES);
+	netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), MAX_QUEUES);
 	if (!netdev) {
 		err = -ENOMEM;
 		goto err_alloc_etherdev;
@@ -2626,6 +2816,9 @@ static void i40evf_remove(struct pci_dev *pdev)
 
 	flush_scheduled_work();
 
+	/* Clear user configurations for RSS */
+	i40evf_clear_rss_config_user(&adapter->vsi);
+
 	if (hw->aq.asq.count)
 		i40evf_shutdown_adminq(hw);
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 32e620e1eb5c..c1c526283757 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -157,7 +157,9 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ |
 	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG |
 	       I40E_VIRTCHNL_VF_OFFLOAD_VLAN |
-	       I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+	       I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+	       I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+
 	adapter->current_op = I40E_VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
 	if (PF_IS_V11(adapter))
@@ -242,7 +244,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
 	adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES;
 	len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) +
 		       (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs);
-	vqci = kzalloc(len, GFP_ATOMIC);
+	vqci = kzalloc(len, GFP_KERNEL);
 	if (!vqci)
 		return;
 
@@ -255,19 +257,19 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
 	for (i = 0; i < pairs; i++) {
 		vqpi->txq.vsi_id = vqci->vsi_id;
 		vqpi->txq.queue_id = i;
-		vqpi->txq.ring_len = adapter->tx_rings[i]->count;
-		vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma;
+		vqpi->txq.ring_len = adapter->tx_rings[i].count;
+		vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma;
 		vqpi->txq.headwb_enabled = 1;
 		vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr +
 		    (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc));
 
 		vqpi->rxq.vsi_id = vqci->vsi_id;
 		vqpi->rxq.queue_id = i;
-		vqpi->rxq.ring_len = adapter->rx_rings[i]->count;
-		vqpi->rxq.dma_ring_addr = adapter->rx_rings[i]->dma;
+		vqpi->rxq.ring_len = adapter->rx_rings[i].count;
+		vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
 		vqpi->rxq.max_pkt_size = adapter->netdev->mtu
 					+ ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
-		vqpi->rxq.databuffer_size = adapter->rx_rings[i]->rx_buf_len;
+		vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len;
 		vqpi++;
 	}
 
@@ -353,14 +355,14 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
 	len = sizeof(struct i40e_virtchnl_irq_map_info) +
 	      (adapter->num_msix_vectors *
 		sizeof(struct i40e_virtchnl_vector_map));
-	vimi = kzalloc(len, GFP_ATOMIC);
+	vimi = kzalloc(len, GFP_KERNEL);
 	if (!vimi)
 		return;
 
 	vimi->num_vectors = adapter->num_msix_vectors;
 	/* Queue vectors first */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
-		q_vector = adapter->q_vector[v_idx];
+		q_vector = adapter->q_vectors + v_idx;
 		vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
 		vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
 		vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
@@ -391,6 +393,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 	struct i40e_virtchnl_ether_addr_list *veal;
 	int len, i = 0, count = 0;
 	struct i40evf_mac_filter *f;
+	bool more = false;
 
 	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -415,10 +418,12 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
 			sizeof(struct i40e_virtchnl_ether_addr);
-		len = I40EVF_MAX_AQ_BUF_SIZE;
+		len = sizeof(struct i40e_virtchnl_ether_addr_list) +
+		      (count * sizeof(struct i40e_virtchnl_ether_addr));
+		more = true;
 	}
 
-	veal = kzalloc(len, GFP_ATOMIC);
+	veal = kzalloc(len, GFP_KERNEL);
 	if (!veal)
 		return;
 
@@ -431,7 +436,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 			f->add = false;
 		}
 	}
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
+	if (!more)
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
 			   (u8 *)veal, len);
 	kfree(veal);
@@ -450,6 +456,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 	struct i40e_virtchnl_ether_addr_list *veal;
 	struct i40evf_mac_filter *f, *ftmp;
 	int len, i = 0, count = 0;
+	bool more = false;
 
 	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -474,9 +481,11 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_ether_addr_list)) /
 			sizeof(struct i40e_virtchnl_ether_addr);
-		len = I40EVF_MAX_AQ_BUF_SIZE;
+		len = sizeof(struct i40e_virtchnl_ether_addr_list) +
+		      (count * sizeof(struct i40e_virtchnl_ether_addr));
+		more = true;
 	}
-	veal = kzalloc(len, GFP_ATOMIC);
+	veal = kzalloc(len, GFP_KERNEL);
 	if (!veal)
 		return;
 
@@ -490,7 +499,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 			kfree(f);
 		}
 	}
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+	if (!more)
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
 			   (u8 *)veal, len);
 	kfree(veal);
@@ -509,6 +519,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 	struct i40e_virtchnl_vlan_filter_list *vvfl;
 	int len, i = 0, count = 0;
 	struct i40evf_vlan_filter *f;
+	bool more = false;
 
 	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -534,9 +545,11 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
 			sizeof(u16);
-		len = I40EVF_MAX_AQ_BUF_SIZE;
+		len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+		      (count * sizeof(u16));
+		more = true;
 	}
-	vvfl = kzalloc(len, GFP_ATOMIC);
+	vvfl = kzalloc(len, GFP_KERNEL);
 	if (!vvfl)
 		return;
 
@@ -549,7 +562,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 			f->add = false;
 		}
 	}
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+	if (!more)
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
 	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
@@ -567,6 +581,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 	struct i40e_virtchnl_vlan_filter_list *vvfl;
 	struct i40evf_vlan_filter *f, *ftmp;
 	int len, i = 0, count = 0;
+	bool more = false;
 
 	if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -592,9 +607,11 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 		count = (I40EVF_MAX_AQ_BUF_SIZE -
 			 sizeof(struct i40e_virtchnl_vlan_filter_list)) /
 			sizeof(u16);
-		len = I40EVF_MAX_AQ_BUF_SIZE;
+		len = sizeof(struct i40e_virtchnl_vlan_filter_list) +
+		      (count * sizeof(u16));
+		more = true;
 	}
-	vvfl = kzalloc(len, GFP_ATOMIC);
+	vvfl = kzalloc(len, GFP_KERNEL);
 	if (!vvfl)
 		return;
 
@@ -608,7 +625,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 			kfree(f);
 		}
 	}
-	adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+	if (!more)
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
 	i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
 	kfree(vvfl);
 }
@@ -724,9 +742,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		return;
 	}
 	if (v_retval) {
-		dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
-			v_retval, i40evf_stat_str(&adapter->hw, v_retval),
-			v_opcode);
+		switch (v_opcode) {
+		case I40E_VIRTCHNL_OP_ADD_VLAN:
+			dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			break;
+		case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
+			dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			break;
+		case I40E_VIRTCHNL_OP_DEL_VLAN:
+			dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			break;
+		case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
+			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			break;
+		default:
+			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
+				v_retval,
+				i40evf_stat_str(&adapter->hw, v_retval),
+				v_opcode);
+		}
 	}
 	switch (v_opcode) {
 	case I40E_VIRTCHNL_OP_GET_STATS: {
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 7a73510e547c..362911d024b5 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -272,6 +272,11 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
 			if (ret_val)
 				goto out;
 		}
+		if (phy->id == M88E1543_E_PHY_ID) {
+			ret_val = igb_initialize_M88E1543_phy(hw);
+			if (ret_val)
+				goto out;
+		}
 		break;
 	case IGP03E1000_E_PHY_ID:
 		phy->type = e1000_phy_igp_3;
@@ -294,6 +299,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
 	case I210_I_PHY_ID:
 		phy->type		= e1000_phy_i210;
 		phy->ops.check_polarity	= igb_check_polarity_m88;
+		phy->ops.get_cfg_done	= igb_get_cfg_done_i210;
 		phy->ops.get_phy_info	= igb_get_phy_info_m88;
 		phy->ops.get_cable_length = igb_get_cable_length_m88_gen2;
 		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580;
@@ -925,6 +931,8 @@ static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
 
 	if (phy->id == M88E1512_E_PHY_ID)
 		ret_val = igb_initialize_M88E1512_phy(hw);
+	if (phy->id == M88E1543_E_PHY_ID)
+		ret_val = igb_initialize_M88E1543_phy(hw);
 out:
 	return ret_val;
 }
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index b1915043bc0c..a61ee9462dd4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -990,6 +990,7 @@
 #define E1000_M88E1543_PAGE_ADDR	0x16       /* Page Offset Register */
 #define E1000_M88E1543_EEE_CTRL_1	0x0
 #define E1000_M88E1543_EEE_CTRL_1_MS	0x0001     /* EEE Master/Slave */
+#define E1000_M88E1543_FIBER_CTRL	0x0
 #define E1000_EEE_ADV_DEV_I354		7
 #define E1000_EEE_ADV_ADDR_I354		60
 #define E1000_EEE_ADV_100_SUPPORTED	(1 << 1)   /* 100BaseTx EEE Supported */
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index 65d931669f81..29f59c76878a 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -900,3 +900,30 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
 	wr32(E1000_MDICNFG, mdicnfg);
 	return ret_val;
 }
+
+/**
+ *  igb_get_cfg_done_i210 - Read config done bit
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the management control register for the config done bit for
+ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
+ *  to read the config done bit, so an error is *ONLY* logged and returns
+ *  0.  If we were to return with error, EEPROM-less silicon
+ *  would not be able to be reset or change link.
+ **/
+s32 igb_get_cfg_done_i210(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+	while (timeout) {
+		if (rd32(E1000_EEMNGCTL_I210) & mask)
+			break;
+		usleep_range(1000, 2000);
+		timeout--;
+	}
+	if (!timeout)
+		hw_dbg("MNG configuration cycle has not completed.\n");
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index 3442b6357d01..eaa68a50cb3b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -34,6 +34,7 @@ s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data);
 s32 igb_init_nvm_params_i210(struct e1000_hw *hw);
 bool igb_get_flash_presence_i210(struct e1000_hw *hw);
 s32 igb_pll_workaround_i210(struct e1000_hw *hw);
+s32 igb_get_cfg_done_i210(struct e1000_hw *hw);
 
 #define E1000_STM_OPCODE		0xDB00
 #define E1000_EEPROM_FLASH_SIZE_WORD	0x11
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 23ec28f43f6d..c0df40f2b295 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -2278,6 +2278,100 @@ out:
 }
 
 /**
+ *  igb_initialize_M88E1543_phy - Initialize M88E1512 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize Marvell 1543 to work correctly with Avoton.
+ **/
+s32 igb_initialize_M88E1543_phy(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+
+	/* Switch to PHY page 0xFF. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xDC0C);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0xFB. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x0C0D);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0x12. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12);
+	if (ret_val)
+		goto out;
+
+	/* Change mode to SGMII-to-Copper */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 1. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x1);
+	if (ret_val)
+		goto out;
+
+	/* Change mode to 1000BASE-X/SGMII and autoneg enable */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_FIBER_CTRL, 0x9140);
+	if (ret_val)
+		goto out;
+
+	/* Return the PHY to page 0. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val) {
+		hw_dbg("Error committing the PHY changes\n");
+		return ret_val;
+	}
+
+	/* msec_delay(1000); */
+	usleep_range(1000, 2000);
+out:
+	return ret_val;
+}
+
+/**
  * igb_power_up_phy_copper - Restore copper link in case of PHY power down
  * @hw: pointer to the HW structure
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 24d55edbb0e3..aa1ae61a61d8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -62,6 +62,7 @@ void igb_power_up_phy_copper(struct e1000_hw *hw);
 void igb_power_down_phy_copper(struct e1000_hw *hw);
 s32  igb_phy_init_script_igp3(struct e1000_hw *hw);
 s32  igb_initialize_M88E1512_phy(struct e1000_hw *hw);
+s32  igb_initialize_M88E1543_phy(struct e1000_hw *hw);
 s32  igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
 s32  igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
 s32  igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 4af2870e49f8..21d9d02885cb 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -66,6 +66,7 @@
 #define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
 #define E1000_PBS      0x01008  /* Packet Buffer Size */
 #define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
+#define E1000_EEMNGCTL_I210 0x12030  /* MNG EEprom Control */
 #define E1000_EEARBC_I210 0x12024  /* EEPROM Auto Read Bus Control */
 #define E1000_EEWR     0x0102C  /* EEPROM Write Register - RW */
 #define E1000_I2CCMD   0x01028  /* SFPI2C Command Register - RW */
@@ -385,8 +386,7 @@ do { \
 #define array_wr32(reg, offset, value) \
 	wr32((reg) + ((offset) << 2), (value))
 
-#define array_rd32(reg, offset) \
-	(readl(hw->hw_addr + reg + ((offset) << 2)))
+#define array_rd32(reg, offset) (igb_rd32(hw, reg + ((offset) << 2)))
 
 /* DMA Coalescing registers */
 #define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1a2f1cc44b28..e3cb93bdb21a 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -389,6 +389,8 @@ struct igb_adapter {
 	u16 link_speed;
 	u16 link_duplex;
 
+	u8 __iomem *io_addr; /* Mainly for iounmap use */
+
 	struct work_struct reset_task;
 	struct work_struct watchdog_task;
 	bool fc_autoneg;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ea7b09887245..7afde455326d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -946,7 +946,6 @@ static void igb_configure_msix(struct igb_adapter *adapter)
 static int igb_request_msix(struct igb_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct e1000_hw *hw = &adapter->hw;
 	int i, err = 0, vector = 0, free_vector = 0;
 
 	err = request_irq(adapter->msix_entries[vector].vector,
@@ -959,7 +958,7 @@ static int igb_request_msix(struct igb_adapter *adapter)
 
 		vector++;
 
-		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
+		q_vector->itr_register = adapter->io_addr + E1000_EITR(vector);
 
 		if (q_vector->rx.ring && q_vector->tx.ring)
 			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
@@ -1230,7 +1229,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
 	q_vector->tx.work_limit = adapter->tx_work_limit;
 
 	/* initialize ITR configuration */
-	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
+	q_vector->itr_register = adapter->io_addr + E1000_EITR(0);
 	q_vector->itr_val = IGB_START_ITR;
 
 	/* initialize pointer to rings */
@@ -2294,9 +2293,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 	err = -EIO;
-	hw->hw_addr = pci_iomap(pdev, 0, 0);
-	if (!hw->hw_addr)
+	adapter->io_addr = pci_iomap(pdev, 0, 0);
+	if (!adapter->io_addr)
 		goto err_ioremap;
+	/* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */
+	hw->hw_addr = adapter->io_addr;
 
 	netdev->netdev_ops = &igb_netdev_ops;
 	igb_set_ethtool_ops(netdev);
@@ -2656,7 +2657,7 @@ err_sw_init:
 #ifdef CONFIG_PCI_IOV
 	igb_disable_sriov(pdev);
 #endif
-	pci_iounmap(pdev, hw->hw_addr);
+	pci_iounmap(pdev, adapter->io_addr);
 err_ioremap:
 	free_netdev(netdev);
 err_alloc_etherdev:
@@ -2823,7 +2824,7 @@ static void igb_remove(struct pci_dev *pdev)
 
 	igb_clear_interrupt_scheme(adapter);
 
-	pci_iounmap(pdev, hw->hw_addr);
+	pci_iounmap(pdev, adapter->io_addr);
 	if (hw->flash_address)
 		iounmap(hw->flash_address);
 	pci_release_selected_regions(pdev,
@@ -2856,6 +2857,13 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
 	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
 		return;
 
+	/* Of the below we really only want the effect of getting
+	 * IGB_FLAG_HAS_MSIX set (if available), without which
+	 * igb_enable_sriov() has no effect.
+	 */
+	igb_set_interrupt_capability(adapter, true);
+	igb_reset_interrupt_capability(adapter);
+
 	pci_sriov_set_totalvfs(pdev, 7);
 	igb_enable_sriov(pdev, max_vfs);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 1d2174526a4c..f4c9a42dafcf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -139,6 +139,7 @@ enum ixgbe_tx_flags {
 #define IXGBE_X540_VF_DEVICE_ID         0x1515
 
 struct vf_data_storage {
+	struct pci_dev *vfdev;
 	unsigned char vf_mac_addresses[ETH_ALEN];
 	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
 	u16 num_vf_mc_hashes;
@@ -224,6 +225,8 @@ struct ixgbe_rx_queue_stats {
 	u64 csum_err;
 };
 
+#define IXGBE_TS_HDR_LEN 8
+
 enum ixgbe_ring_state_t {
 	__IXGBE_TX_FDIR_INIT_DONE,
 	__IXGBE_TX_XPS_INIT_DONE,
@@ -282,6 +285,8 @@ struct ixgbe_ring {
 	u16 next_to_use;
 	u16 next_to_clean;
 
+	unsigned long last_rx_timestamp;
+
 	union {
 		u16 next_to_alloc;
 		struct {
@@ -587,9 +592,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
 
 struct ixgbe_mac_addr {
 	u8 addr[ETH_ALEN];
-	u16 queue;
+	u16 pool;
 	u16 state; /* bitmask */
 };
+
 #define IXGBE_MAC_STATE_DEFAULT		0x1
 #define IXGBE_MAC_STATE_MODIFIED	0x2
 #define IXGBE_MAC_STATE_IN_USE		0x4
@@ -639,6 +645,8 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG_SRIOV_CAPABLE                (u32)(1 << 22)
 #define IXGBE_FLAG_SRIOV_ENABLED                (u32)(1 << 23)
 #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE	BIT(24)
+#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED		BIT(25)
+#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER	BIT(26)
 
 	u32 flags2;
 #define IXGBE_FLAG2_RSC_CAPABLE                 (u32)(1 << 0)
@@ -656,6 +664,7 @@ struct ixgbe_adapter {
 #ifdef CONFIG_IXGBE_VXLAN
 #define IXGBE_FLAG2_VXLAN_REREG_NEEDED		BIT(12)
 #endif
+#define IXGBE_FLAG2_VLAN_PROMISC		BIT(13)
 
 	/* Tx fast path data */
 	int num_tx_queues;
@@ -755,9 +764,12 @@ struct ixgbe_adapter {
 	unsigned long last_rx_ptp_check;
 	unsigned long last_rx_timestamp;
 	spinlock_t tmreg_lock;
-	struct cyclecounter cc;
-	struct timecounter tc;
+	struct cyclecounter hw_cc;
+	struct timecounter hw_tc;
 	u32 base_incval;
+	u32 tx_hwtstamp_timeouts;
+	u32 rx_hwtstamp_cleared;
+	void (*ptp_setup_sdp)(struct ixgbe_adapter *);
 
 	/* SR-IOV */
 	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
@@ -883,9 +895,10 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
 void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
 #endif
 int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
-			 u8 *addr, u16 queue);
+			 const u8 *addr, u16 queue);
 int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
-			 u8 *addr, u16 queue);
+			 const u8 *addr, u16 queue);
+void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid);
 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
 netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *,
 				  struct ixgbe_ring *);
@@ -968,12 +981,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_stop(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter);
-void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb);
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *);
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb);
+static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring,
+					 union ixgbe_adv_rx_desc *rx_desc,
+					 struct sk_buff *skb)
+{
+	if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) {
+		ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb);
+		return;
+	}
+
+	if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)))
+		return;
+
+	ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+	/* Update the last_rx_timestamp timer in order to enable watchdog check
+	 * for error case of latched timestamp on a dropped packet.
+	 */
+	rx_ring->last_rx_timestamp = jiffies;
+}
+
 int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
 int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
 void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter);
 void ixgbe_ptp_reset(struct ixgbe_adapter *adapter);
-void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr);
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter);
 #ifdef CONFIG_PCI_IOV
 void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter);
 #endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 65db69b862fb..d8a9fb8a59e2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
+  Copyright(c) 1999 - 2015 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -765,13 +765,14 @@ mac_reset_top:
 	ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
 	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
 
 	/* Poll for reset bit to self-clear indicating reset is complete */
 	for (i = 0; i < 10; i++) {
-		udelay(1);
 		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
 		if (!(ctrl & IXGBE_CTRL_RST))
 			break;
+		udelay(1);
 	}
 	if (ctrl & IXGBE_CTRL_RST) {
 		status = IXGBE_ERR_RESET_FAILED;
@@ -879,11 +880,12 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
  *  @vlan: VLAN id to write to VLAN filter
  *  @vind: VMDq output index that maps queue to VLAN id in VFTA
  *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *  @vlvf_bypass: boolean flag - unused
  *
  *  Turn on/off specified VLAN in the VLAN filter table.
  **/
 static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-				bool vlan_on)
+				bool vlan_on, bool vlvf_bypass)
 {
 	u32 regindex;
 	u32 bitindex;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index a39afcf03e2c..fa8d4f40ac2a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -990,13 +990,14 @@ mac_reset_top:
 	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
 	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
 
 	/* Poll for reset bit to self-clear indicating reset is complete */
 	for (i = 0; i < 10; i++) {
-		udelay(1);
 		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
 		if (!(ctrl & IXGBE_CTRL_RST_MASK))
 			break;
+		udelay(1);
 	}
 
 	if (ctrl & IXGBE_CTRL_RST_MASK) {
@@ -1082,12 +1083,16 @@ mac_reset_top:
 
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
 	if (is_valid_ether_addr(hw->mac.san_addr)) {
-		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
-
 		/* Save the SAN MAC RAR index */
 		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
 
+		hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* clear VMDq pool/queue selection for this RAR */
+		hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index,
+				       IXGBE_CLEAR_VMDQ_ALL);
+
 		/* Reserve the last RAR for the SAN MAC address */
 		hw->mac.num_rar_entries--;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index ce61b36b94f1..64045053e874 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
+  Copyright(c) 1999 - 2015 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -1884,10 +1884,11 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
 		hw_dbg(hw, " New MAC Addr =%pM\n", hw->mac.addr);
 
 		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
-
-		/*  clear VMDq pool/queue selection for RAR 0 */
-		hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
 	}
+
+	/*  clear VMDq pool/queue selection for RAR 0 */
+	hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
+
 	hw->addr_ctrl.overflow_promisc = 0;
 
 	hw->addr_ctrl.rar_used_count = 1;
@@ -2454,6 +2455,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
 	/* Always set this bit to ensure any future transactions are blocked */
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
 
+	/* Poll for bit to read as set */
+	for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS)
+			break;
+		usleep_range(100, 120);
+	}
+	if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) {
+		hw_dbg(hw, "GIO disable did not set - requesting resets\n");
+		goto gio_disable_fail;
+	}
+
 	/* Exit if master requests are blocked */
 	if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) ||
 	    ixgbe_removed(hw->hw_addr))
@@ -2475,6 +2487,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
 	 * again to clear out any effects they may have had on our device.
 	 */
 	hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n");
+gio_disable_fail:
 	hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
 
 	if (hw->mac.type >= ixgbe_mac_X550)
@@ -2987,43 +3000,44 @@ s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
  *  return the VLVF index where this VLAN id should be placed
  *
  **/
-static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan)
+static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass)
 {
-	u32 bits = 0;
-	u32 first_empty_slot = 0;
-	s32 regindex;
+	s32 regindex, first_empty_slot;
+	u32 bits;
 
 	/* short cut the special case */
 	if (vlan == 0)
 		return 0;
 
-	/*
-	  * Search for the vlan id in the VLVF entries. Save off the first empty
-	  * slot found along the way
-	  */
-	for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) {
+	/* if vlvf_bypass is set we don't want to use an empty slot, we
+	 * will simply bypass the VLVF if there are no entries present in the
+	 * VLVF that contain our VLAN
+	 */
+	first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0;
+
+	/* add VLAN enable bit for comparison */
+	vlan |= IXGBE_VLVF_VIEN;
+
+	/* Search for the vlan id in the VLVF entries. Save off the first empty
+	 * slot found along the way.
+	 *
+	 * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1
+	 */
+	for (regindex = IXGBE_VLVF_ENTRIES; --regindex;) {
 		bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
-		if (!bits && !(first_empty_slot))
+		if (bits == vlan)
+			return regindex;
+		if (!first_empty_slot && !bits)
 			first_empty_slot = regindex;
-		else if ((bits & 0x0FFF) == vlan)
-			break;
 	}
 
-	/*
-	  * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan
-	  * in the VLVF. Else use the first empty VLVF register for this
-	  * vlan id.
-	  */
-	if (regindex >= IXGBE_VLVF_ENTRIES) {
-		if (first_empty_slot)
-			regindex = first_empty_slot;
-		else {
-			hw_dbg(hw, "No space in VLVF.\n");
-			regindex = IXGBE_ERR_NO_SPACE;
-		}
-	}
+	/* If we are here then we didn't find the VLAN.  Return first empty
+	 * slot we found during our search, else error.
+	 */
+	if (!first_empty_slot)
+		hw_dbg(hw, "No space in VLVF.\n");
 
-	return regindex;
+	return first_empty_slot ? : IXGBE_ERR_NO_SPACE;
 }
 
 /**
@@ -3032,21 +3046,17 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan)
  *  @vlan: VLAN id to write to VLAN filter
  *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
  *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *  @vlvf_bypass: boolean flag indicating updating default pool is okay
  *
  *  Turn on/off specified VLAN in the VLAN filter table.
  **/
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
-			   bool vlan_on)
+			   bool vlan_on, bool vlvf_bypass)
 {
-	s32 regindex;
-	u32 bitindex;
-	u32 vfta;
-	u32 bits;
-	u32 vt;
-	u32 targetbit;
-	bool vfta_changed = false;
+	u32 regidx, vfta_delta, vfta, bits;
+	s32 vlvf_index;
 
-	if (vlan > 4095)
+	if ((vlan > 4095) || (vind > 63))
 		return IXGBE_ERR_PARAM;
 
 	/*
@@ -3061,22 +3071,16 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 	 *    bits[11-5]: which register
 	 *    bits[4-0]:  which bit in the register
 	 */
-	regindex = (vlan >> 5) & 0x7F;
-	bitindex = vlan & 0x1F;
-	targetbit = (1 << bitindex);
-	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
-
-	if (vlan_on) {
-		if (!(vfta & targetbit)) {
-			vfta |= targetbit;
-			vfta_changed = true;
-		}
-	} else {
-		if ((vfta & targetbit)) {
-			vfta &= ~targetbit;
-			vfta_changed = true;
-		}
-	}
+	regidx = vlan / 32;
+	vfta_delta = 1 << (vlan % 32);
+	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx));
+
+	/* vfta_delta represents the difference between the current value
+	 * of vfta and the value we want in the register.  Since the diff
+	 * is an XOR mask we can just update vfta using an XOR.
+	 */
+	vfta_delta &= vlan_on ? ~vfta : vfta;
+	vfta ^= vfta_delta;
 
 	/* Part 2
 	 * If VT Mode is set
@@ -3086,85 +3090,67 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
 	 *   Or !vlan_on
 	 *     clear the pool bit and possibly the vind
 	 */
-	vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
-	if (vt & IXGBE_VT_CTL_VT_ENABLE) {
-		s32 vlvf_index;
-
-		vlvf_index = ixgbe_find_vlvf_slot(hw, vlan);
-		if (vlvf_index < 0)
-			return vlvf_index;
-
-		if (vlan_on) {
-			/* set the pool bit */
-			if (vind < 32) {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index*2));
-				bits |= (1 << vind);
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB(vlvf_index*2),
-						bits);
-			} else {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB((vlvf_index*2)+1));
-				bits |= (1 << (vind-32));
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB((vlvf_index*2)+1),
-						bits);
-			}
-		} else {
-			/* clear the pool bit */
-			if (vind < 32) {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index*2));
-				bits &= ~(1 << vind);
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB(vlvf_index*2),
-						bits);
-				bits |= IXGBE_READ_REG(hw,
-						IXGBE_VLVFB((vlvf_index*2)+1));
-			} else {
-				bits = IXGBE_READ_REG(hw,
-						IXGBE_VLVFB((vlvf_index*2)+1));
-				bits &= ~(1 << (vind-32));
-				IXGBE_WRITE_REG(hw,
-						IXGBE_VLVFB((vlvf_index*2)+1),
-						bits);
-				bits |= IXGBE_READ_REG(hw,
-						IXGBE_VLVFB(vlvf_index*2));
-			}
-		}
+	if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE))
+		goto vfta_update;
+
+	vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass);
+	if (vlvf_index < 0) {
+		if (vlvf_bypass)
+			goto vfta_update;
+		return vlvf_index;
+	}
 
-		/*
-		 * If there are still bits set in the VLVFB registers
-		 * for the VLAN ID indicated we need to see if the
-		 * caller is requesting that we clear the VFTA entry bit.
-		 * If the caller has requested that we clear the VFTA
-		 * entry bit but there are still pools/VFs using this VLAN
-		 * ID entry then ignore the request.  We're not worried
-		 * about the case where we're turning the VFTA VLAN ID
-		 * entry bit on, only when requested to turn it off as
-		 * there may be multiple pools and/or VFs using the
-		 * VLAN ID entry.  In that case we cannot clear the
-		 * VFTA bit until all pools/VFs using that VLAN ID have also
-		 * been cleared.  This will be indicated by "bits" being
-		 * zero.
+	bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32));
+
+	/* set the pool bit */
+	bits |= 1 << (vind % 32);
+	if (vlan_on)
+		goto vlvf_update;
+
+	/* clear the pool bit */
+	bits ^= 1 << (vind % 32);
+
+	if (!bits &&
+	    !IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + 1 - vind / 32))) {
+		/* Clear VFTA first, then disable VLVF.  Otherwise
+		 * we run the risk of stray packets leaking into
+		 * the PF via the default pool
 		 */
-		if (bits) {
-			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index),
-					(IXGBE_VLVF_VIEN | vlan));
-			if (!vlan_on) {
-				/* someone wants to clear the vfta entry
-				 * but some pools/VFs are still using it.
-				 * Ignore it. */
-				vfta_changed = false;
-			}
-		} else {
-			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
-		}
+		if (vfta_delta)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta);
+
+		/* disable VLVF and clear remaining bit from pool */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), 0);
+
+		return 0;
 	}
 
-	if (vfta_changed)
-		IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta);
+	/* If there are still bits set in the VLVFB registers
+	 * for the VLAN ID indicated we need to see if the
+	 * caller is requesting that we clear the VFTA entry bit.
+	 * If the caller has requested that we clear the VFTA
+	 * entry bit but there are still pools/VFs using this VLAN
+	 * ID entry then ignore the request.  We're not worried
+	 * about the case where we're turning the VFTA VLAN ID
+	 * entry bit on, only when requested to turn it off as
+	 * there may be multiple pools and/or VFs using the
+	 * VLAN ID entry.  In that case we cannot clear the
+	 * VFTA bit until all pools/VFs using that VLAN ID have also
+	 * been cleared.  This will be indicated by "bits" being
+	 * zero.
+	 */
+	vfta_delta = 0;
+
+vlvf_update:
+	/* record pool change and enable VLAN ID if not already enabled */
+	IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), bits);
+	IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), IXGBE_VLVF_VIEN | vlan);
+
+vfta_update:
+	/* Update VFTA now that we are ready for traffic */
+	if (vfta_delta)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta);
 
 	return 0;
 }
@@ -3184,8 +3170,8 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
 
 	for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) {
 		IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset*2), 0);
-		IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset*2)+1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2 + 1), 0);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index a0044e4a8b90..2b9563137fd8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -92,7 +92,7 @@ s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
 s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
 s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
-			   u32 vind, bool vlan_on);
+			   u32 vind, bool vlan_on, bool vlvf_bypass);
 s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
 s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
 				 ixgbe_link_speed *speed,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index d681273bd39d..1ed4c9add00d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -166,8 +166,6 @@ static int ixgbe_get_settings(struct net_device *netdev,
 	/* set the supported link speeds */
 	if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
 		ecmd->supported |= SUPPORTED_10000baseT_Full;
-	if (supported_link & IXGBE_LINK_SPEED_2_5GB_FULL)
-		ecmd->supported |= SUPPORTED_2500baseX_Full;
 	if (supported_link & IXGBE_LINK_SPEED_1GB_FULL)
 		ecmd->supported |= SUPPORTED_1000baseT_Full;
 	if (supported_link & IXGBE_LINK_SPEED_100_FULL)
@@ -179,8 +177,6 @@ static int ixgbe_get_settings(struct net_device *netdev,
 			ecmd->advertising |= ADVERTISED_100baseT_Full;
 		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
 			ecmd->advertising |= ADVERTISED_10000baseT_Full;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL)
-			ecmd->advertising |= ADVERTISED_2500baseX_Full;
 		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
 			ecmd->advertising |= ADVERTISED_1000baseT_Full;
 	} else {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 631c603fc966..5f988703e1b7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -620,8 +620,7 @@ static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu)
 	struct ixgbe_fcoe_ddp_pool *ddp_pool;
 
 	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (ddp_pool->pool)
-		dma_pool_destroy(ddp_pool->pool);
+	dma_pool_destroy(ddp_pool->pool);
 	ddp_pool->pool = NULL;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4089d776d01a..66c64a376719 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -65,9 +65,6 @@
 #include "ixgbe_common.h"
 #include "ixgbe_dcb_82599.h"
 #include "ixgbe_sriov.h"
-#ifdef CONFIG_IXGBE_VXLAN
-#include <net/vxlan.h>
-#endif
 
 char ixgbe_driver_name[] = "ixgbe";
 static const char ixgbe_driver_string[] =
@@ -175,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+static struct workqueue_struct *ixgbe_wq;
+
 static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev);
 
 static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
@@ -316,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
 	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
 	    !test_bit(__IXGBE_REMOVING, &adapter->state) &&
 	    !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state))
-		schedule_work(&adapter->service_task);
+		queue_work(ixgbe_wq, &adapter->service_task);
 }
 
 static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
@@ -1635,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 				     struct sk_buff *skb)
 {
 	struct net_device *dev = rx_ring->netdev;
+	u32 flags = rx_ring->q_vector->adapter->flags;
 
 	ixgbe_update_rsc_stats(rx_ring, skb);
 
@@ -1642,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 
 	ixgbe_rx_checksum(rx_ring, rx_desc, skb);
 
-	if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)))
-		ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb);
+	if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
+		ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
 
 	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
@@ -2741,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
 	ixgbe_check_fan_failure(adapter, eicr);
 
 	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
-		ixgbe_ptp_check_pps_event(adapter, eicr);
+		ixgbe_ptp_check_pps_event(adapter);
 
 	/* re-enable the original interrupt state, no lsc, no queues */
 	if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -2757,7 +2757,7 @@ static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
 	/* EIAM disabled interrupts (on this vector) for us */
 
 	if (q_vector->rx.ring || q_vector->tx.ring)
-		napi_schedule(&q_vector->napi);
+		napi_schedule_irqoff(&q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -2786,7 +2786,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 	ixgbe_for_each_ring(ring, q_vector->tx)
 		clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring);
 
-	if (!ixgbe_qv_lock_napi(q_vector))
+	/* Exit if we are called by netpoll or busy polling is active */
+	if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector))
 		return budget;
 
 	/* attempt to distribute budget to each queue fairly, but don't allow
@@ -2947,10 +2948,10 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
 
 	ixgbe_check_fan_failure(adapter, eicr);
 	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
-		ixgbe_ptp_check_pps_event(adapter, eicr);
+		ixgbe_ptp_check_pps_event(adapter);
 
 	/* would disable interrupts here but EIAM disabled it */
-	napi_schedule(&q_vector->napi);
+	napi_schedule_irqoff(&q_vector->napi);
 
 	/*
 	 * re-enable link(maybe) and non-queue interrupts, no flush.
@@ -3315,8 +3316,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
 }
 
 /**
- * Return a number of entries in the RSS indirection table
- *
+ * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries
  * @adapter: device handle
  *
  *  - 82598/82599/X540:     128
@@ -3334,8 +3334,7 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter)
 }
 
 /**
- * Write the RETA table to HW
- *
+ * ixgbe_store_reta - Write the RETA table to HW
  * @adapter: device handle
  *
  * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
@@ -3374,8 +3373,7 @@ void ixgbe_store_reta(struct ixgbe_adapter *adapter)
 }
 
 /**
- * Write the RETA table to HW (for x550 devices in SRIOV mode)
- *
+ * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode)
  * @adapter: device handle
  *
  * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
@@ -3704,6 +3702,9 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 	/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
 	hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
 
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
+
 	/*
 	 * Set up VF register offsets for selected VT Mode,
 	 * i.e. 32 or 64 VFs for SR-IOV
@@ -3901,12 +3902,56 @@ static int ixgbe_vlan_rx_add_vid(struct net_device *netdev,
 	struct ixgbe_hw *hw = &adapter->hw;
 
 	/* add VID to filter table */
-	hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true);
+	hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true, true);
 	set_bit(vid, adapter->active_vlans);
 
 	return 0;
 }
 
+static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan)
+{
+	u32 vlvf;
+	int idx;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/* Search for the vlan id in the VLVF entries */
+	for (idx = IXGBE_VLVF_ENTRIES; --idx;) {
+		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(idx));
+		if ((vlvf & VLAN_VID_MASK) == vlan)
+			break;
+	}
+
+	return idx;
+}
+
+void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bits, word;
+	int idx;
+
+	idx = ixgbe_find_vlvf_entry(hw, vid);
+	if (!idx)
+		return;
+
+	/* See if any other pools are set for this VLAN filter
+	 * entry other than the PF.
+	 */
+	word = idx * 2 + (VMDQ_P(0) / 32);
+	bits = ~(1 << (VMDQ_P(0)) % 32);
+	bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
+
+	/* Disable the filter so this falls into the default pool. */
+	if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1))) {
+		if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+			IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(idx), 0);
+	}
+}
+
 static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev,
 				  __be16 proto, u16 vid)
 {
@@ -3914,7 +3959,11 @@ static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev,
 	struct ixgbe_hw *hw = &adapter->hw;
 
 	/* remove VID from filter table */
-	hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), false);
+	if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
+		ixgbe_update_pf_promisc_vlvf(adapter, vid);
+	else
+		hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), false, true);
+
 	clear_bit(vid, adapter->active_vlans);
 
 	return 0;
@@ -3992,6 +4041,129 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
 	}
 }
 
+static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl, i;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	default:
+		if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
+			break;
+		/* fall through */
+	case ixgbe_mac_82598EB:
+		/* legacy case, we can just disable VLAN filtering */
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		return;
+	}
+
+	/* We are already in VLAN promisc, nothing to do */
+	if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
+		return;
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
+
+	/* Add PF to all active pools */
+	for (i = IXGBE_VLVF_ENTRIES; --i;) {
+		u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
+		u32 vlvfb = IXGBE_READ_REG(hw, reg_offset);
+
+		vlvfb |= 1 << (VMDQ_P(0) % 32);
+		IXGBE_WRITE_REG(hw, reg_offset, vlvfb);
+	}
+
+	/* Set all bits in the VLAN filter table array */
+	for (i = hw->mac.vft_size; i--;)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), ~0U);
+}
+
+#define VFTA_BLOCK_SIZE 8
+static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
+	u32 vid_start = vfta_offset * 32;
+	u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
+	u32 i, vid, word, bits;
+
+	for (i = IXGBE_VLVF_ENTRIES; --i;) {
+		u32 vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
+
+		/* pull VLAN ID from VLVF */
+		vid = vlvf & VLAN_VID_MASK;
+
+		/* only concern outselves with a certain range */
+		if (vid < vid_start || vid >= vid_end)
+			continue;
+
+		if (vlvf) {
+			/* record VLAN ID in VFTA */
+			vfta[(vid - vid_start) / 32] |= 1 << (vid % 32);
+
+			/* if PF is part of this then continue */
+			if (test_bit(vid, adapter->active_vlans))
+				continue;
+		}
+
+		/* remove PF from the pool */
+		word = i * 2 + VMDQ_P(0) / 32;
+		bits = ~(1 << (VMDQ_P(0) % 32));
+		bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits);
+	}
+
+	/* extract values from active_vlans and write back to VFTA */
+	for (i = VFTA_BLOCK_SIZE; i--;) {
+		vid = (vfta_offset + i) * 32;
+		word = vid / BITS_PER_LONG;
+		bits = vid % BITS_PER_LONG;
+
+		vfta[i] |= adapter->active_vlans[word] >> bits;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(vfta_offset + i), vfta[i]);
+	}
+}
+
+static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl, i;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	default:
+		if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
+			break;
+		/* fall through */
+	case ixgbe_mac_82598EB:
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
+		vlnctrl |= IXGBE_VLNCTRL_VFE;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		return;
+	}
+
+	/* We are not in VLAN promisc, nothing to do */
+	if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		return;
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
+
+	for (i = 0; i < hw->mac.vft_size; i += VFTA_BLOCK_SIZE)
+		ixgbe_scrub_vfta(adapter, i);
+}
+
 static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
 {
 	u16 vid;
@@ -4034,124 +4206,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev)
 #ifdef CONFIG_PCI_IOV
 void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
 {
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i;
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
-			hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
-					    adapter->mac_table[i].queue,
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+			hw->mac.ops.set_rar(hw, i,
+					    mac_table->addr,
+					    mac_table->pool,
 					    IXGBE_RAH_AV);
 		else
 			hw->mac.ops.clear_rar(hw, i);
-
-		adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED);
 	}
 }
-#endif
 
+#endif
 static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
 {
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i;
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
-			if (adapter->mac_table[i].state &
-			    IXGBE_MAC_STATE_IN_USE)
-				hw->mac.ops.set_rar(hw, i,
-						adapter->mac_table[i].addr,
-						adapter->mac_table[i].queue,
-						IXGBE_RAH_AV);
-			else
-				hw->mac.ops.clear_rar(hw, i);
 
-			adapter->mac_table[i].state &=
-						~(IXGBE_MAC_STATE_MODIFIED);
-		}
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED))
+			continue;
+
+		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+			hw->mac.ops.set_rar(hw, i,
+					    mac_table->addr,
+					    mac_table->pool,
+					    IXGBE_RAH_AV);
+		else
+			hw->mac.ops.clear_rar(hw, i);
 	}
 }
 
 static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
 {
-	int i;
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
 
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
-		adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
-		eth_zero_addr(adapter->mac_table[i].addr);
-		adapter->mac_table[i].queue = 0;
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
 	}
+
 	ixgbe_sync_mac_table(adapter);
 }
 
-static int ixgbe_available_rars(struct ixgbe_adapter *adapter)
+static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool)
 {
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i, count = 0;
 
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state == 0)
-			count++;
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		/* do not count default RAR as available */
+		if (mac_table->state & IXGBE_MAC_STATE_DEFAULT)
+			continue;
+
+		/* only count unused and addresses that belong to us */
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE) {
+			if (mac_table->pool != pool)
+				continue;
+		}
+
+		count++;
 	}
+
 	return count;
 }
 
 /* this function destroys the first RAR entry */
-static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter,
-					 u8 *addr)
+static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter)
 {
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
 
-	memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN);
-	adapter->mac_table[0].queue = VMDQ_P(0);
-	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
-				       IXGBE_MAC_STATE_IN_USE);
-	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
-			    adapter->mac_table[0].queue,
+	memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN);
+	mac_table->pool = VMDQ_P(0);
+
+	mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE;
+
+	hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool,
 			    IXGBE_RAH_AV);
 }
 
-int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 pool)
 {
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i;
 
 	if (is_zero_ether_addr(addr))
 		return -EINVAL;
 
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
 			continue;
-		adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
-						IXGBE_MAC_STATE_IN_USE);
-		ether_addr_copy(adapter->mac_table[i].addr, addr);
-		adapter->mac_table[i].queue = queue;
+
+		ether_addr_copy(mac_table->addr, addr);
+		mac_table->pool = pool;
+
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED |
+				    IXGBE_MAC_STATE_IN_USE;
+
 		ixgbe_sync_mac_table(adapter);
+
 		return i;
 	}
+
 	return -ENOMEM;
 }
 
-int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 pool)
 {
-	/* search table for addr, if found, set to 0 and sync */
-	int i;
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
 	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
 
 	if (is_zero_ether_addr(addr))
 		return -EINVAL;
 
-	for (i = 0; i < hw->mac.num_rar_entries; i++) {
-		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
-		    adapter->mac_table[i].queue == queue) {
-			adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
-			adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
-			eth_zero_addr(adapter->mac_table[i].addr);
-			adapter->mac_table[i].queue = 0;
-			ixgbe_sync_mac_table(adapter);
-			return 0;
-		}
+	/* search table for addr, if found clear IN_USE flag and sync */
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		/* we can only delete an entry if it is in use */
+		if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE))
+			continue;
+		/* we only care about entries that belong to the given pool */
+		if (mac_table->pool != pool)
+			continue;
+		/* we only care about a specific MAC address */
+		if (!ether_addr_equal(addr, mac_table->addr))
+			continue;
+
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
+
+		ixgbe_sync_mac_table(adapter);
+
+		return 0;
 	}
+
 	return -ENOMEM;
 }
 /**
@@ -4169,7 +4373,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
 	int count = 0;
 
 	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
+	if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn))
 		return -ENOMEM;
 
 	if (!netdev_uc_empty(netdev)) {
@@ -4183,6 +4387,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
 	return count;
 }
 
+static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
+
+	return min_t(int, ret, 0);
+}
+
+static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0));
+
+	return 0;
+}
+
 /**
  * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
  * @netdev: network interface device structure
@@ -4197,12 +4420,10 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
-	u32 vlnctrl;
 	int count;
 
 	/* Check for Promiscuous and All Multicast modes */
 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
-	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
 
 	/* set all bits that we expect to always be set */
 	fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */
@@ -4212,25 +4433,18 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 
 	/* clear the bits we are changing the status of */
 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
-	vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
 	if (netdev->flags & IFF_PROMISC) {
 		hw->addr_ctrl.user_set_promisc = true;
 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 		vmolr |= IXGBE_VMOLR_MPE;
-		/* Only disable hardware filter vlans in promiscuous mode
-		 * if SR-IOV and VMDQ are disabled - otherwise ensure
-		 * that hardware VLAN filters remain enabled.
-		 */
-		if (adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED |
-				      IXGBE_FLAG_SRIOV_ENABLED))
-			vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+		ixgbe_vlan_promisc_enable(adapter);
 	} else {
 		if (netdev->flags & IFF_ALLMULTI) {
 			fctrl |= IXGBE_FCTRL_MPE;
 			vmolr |= IXGBE_VMOLR_MPE;
 		}
-		vlnctrl |= IXGBE_VLNCTRL_VFE;
 		hw->addr_ctrl.user_set_promisc = false;
+		ixgbe_vlan_promisc_disable(adapter);
 	}
 
 	/*
@@ -4238,8 +4452,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 	 * sufficient space to store all the addresses then enable
 	 * unicast promiscuous mode
 	 */
-	count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0));
-	if (count < 0) {
+	if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) {
 		fctrl |= IXGBE_FCTRL_UPE;
 		vmolr |= IXGBE_VMOLR_ROPE;
 	}
@@ -4275,7 +4488,6 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 		/* NOTE:  VLAN filtering is disabled by setting PROMISC */
 	}
 
-	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
 
 	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -5042,7 +5254,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct net_device *netdev = adapter->netdev;
 	int err;
-	u8 old_addr[ETH_ALEN];
 
 	if (ixgbe_removed(hw->hw_addr))
 		return;
@@ -5078,10 +5289,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
 	}
 
 	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
-	/* do not flush user set addresses */
-	memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len);
+
+	/* flush entries out of MAC table */
 	ixgbe_flush_sw_mac_table(adapter);
-	ixgbe_mac_set_default_filter(adapter, old_addr);
+	__dev_uc_unsync(netdev, NULL);
+
+	/* do not flush user set addresses */
+	ixgbe_mac_set_default_filter(adapter);
 
 	/* update SAN MAC vmdq pool selection */
 	if (hw->mac.san_mac_rar_index)
@@ -5331,6 +5545,8 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
 				     hw->mac.num_rar_entries,
 				     GFP_ATOMIC);
+	if (!adapter->mac_table)
+		return -ENOMEM;
 
 	/* Set MAC specific capability flags and exceptions */
 	switch (hw->mac.type) {
@@ -6616,10 +6832,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
-	struct pci_dev *vfdev;
+	unsigned int vf;
 	u32 gpc;
-	int pos;
-	unsigned short vf_id;
 
 	if (!(netif_carrier_ok(adapter->netdev)))
 		return;
@@ -6636,26 +6850,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
 	if (!pdev)
 		return;
 
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
-	if (!pos)
-		return;
-
-	/* get the device ID for the VF */
-	pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
-
 	/* check status reg for all VFs owned by this PF */
-	vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
-	while (vfdev) {
-		if (vfdev->is_virtfn && (vfdev->physfn == pdev)) {
-			u16 status_reg;
-
-			pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
-			if (status_reg & PCI_STATUS_REC_MASTER_ABORT)
-				/* issue VFLR */
-				ixgbe_issue_vf_flr(adapter, vfdev);
-		}
+	for (vf = 0; vf < adapter->num_vfs; ++vf) {
+		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+		u16 status_reg;
 
-		vfdev = pci_get_device(pdev->vendor, vf_id, vfdev);
+		if (!vfdev)
+			continue;
+		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
+		if (status_reg != IXGBE_FAILED_READ_CFG_WORD &&
+		    status_reg & PCI_STATUS_REC_MASTER_ABORT)
+			ixgbe_issue_vf_flr(adapter, vfdev);
 	}
 }
 
@@ -7024,6 +7229,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 			struct tcphdr *tcphdr;
 			u8 *raw;
 		} transport_hdr;
+		__be16 frag_off;
 
 		if (skb->encapsulation) {
 			network_hdr.raw = skb_inner_network_header(skb);
@@ -7047,13 +7253,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 		case 6:
 			vlan_macip_lens |= transport_hdr.raw - network_hdr.raw;
 			l4_hdr = network_hdr.ipv6->nexthdr;
+			if (likely((transport_hdr.raw - network_hdr.raw) ==
+				   sizeof(struct ipv6hdr)))
+				break;
+			ipv6_skip_exthdr(skb, network_hdr.raw - skb->data +
+					      sizeof(struct ipv6hdr),
+					 &l4_hdr, &frag_off);
+			if (unlikely(frag_off))
+				l4_hdr = NEXTHDR_FRAGMENT;
 			break;
 		default:
-			if (unlikely(net_ratelimit())) {
-				dev_warn(tx_ring->dev,
-					 "partial checksum but version=%d\n",
-					 network_hdr.ipv4->version);
-			}
+			break;
 		}
 
 		switch (l4_hdr) {
@@ -7074,16 +7284,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 		default:
 			if (unlikely(net_ratelimit())) {
 				dev_warn(tx_ring->dev,
-				 "partial checksum but l4 proto=%x!\n",
-				 l4_hdr);
+					 "partial checksum, version=%d, l4 proto=%x\n",
+					 network_hdr.ipv4->version, l4_hdr);
 			}
-			break;
+			skb_checksum_help(skb);
+			goto no_csum;
 		}
 
 		/* update TX checksum flag */
 		first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
 	}
 
+no_csum:
 	/* vlan_macip_lens: MACLEN, VLAN tag */
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
@@ -7659,17 +7871,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct sockaddr *addr = p;
-	int ret;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 
-	ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0));
-	return ret > 0 ? 0 : ret;
+	ixgbe_mac_set_default_filter(adapter);
+
+	return 0;
 }
 
 static int
@@ -8152,7 +8363,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	/* guarantee we can provide a unique filter for the unicast address */
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
-		if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev))
+		struct ixgbe_adapter *adapter = netdev_priv(dev);
+		u16 pool = VMDQ_P(0);
+
+		if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool))
 			return -ENOMEM;
 	}
 
@@ -8870,7 +9084,7 @@ skip_sriov:
 		goto err_sw_init;
 	}
 
-	ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr);
+	ixgbe_mac_set_default_filter(adapter);
 
 	setup_timer(&adapter->service_timer, &ixgbe_service_timer,
 		    (unsigned long) adapter);
@@ -9325,6 +9539,12 @@ static int __init ixgbe_init_module(void)
 	pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
 	pr_info("%s\n", ixgbe_copyright);
 
+	ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
+	if (!ixgbe_wq) {
+		pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
+		return -ENOMEM;
+	}
+
 	ixgbe_dbg_init();
 
 	ret = pci_register_driver(&ixgbe_driver);
@@ -9356,6 +9576,10 @@ static void __exit ixgbe_exit_module(void)
 	pci_unregister_driver(&ixgbe_driver);
 
 	ixgbe_dbg_exit();
+	if (ixgbe_wq) {
+		destroy_workqueue(ixgbe_wq);
+		ixgbe_wq = NULL;
+	}
 }
 
 #ifdef CONFIG_IXGBE_DCA
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index fb8673d63806..db0731e05401 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
 	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
 		return 0;
 
+	if (!on && ixgbe_mng_present(hw))
+		return 0;
+
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
 				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
 				      &reg);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index e5ba04025e2b..ef1504d41890 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2013 Intel Corporation.
+  Copyright(c) 1999 - 2015 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -27,6 +27,7 @@
 *******************************************************************************/
 #include "ixgbe.h"
 #include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
 
 /*
  * The 82599 and the X540 do not have true 64bit nanosecond scale
@@ -93,7 +94,6 @@
 
 #define IXGBE_INCVAL_SHIFT_82599 7
 #define IXGBE_INCPER_SHIFT_82599 24
-#define IXGBE_MAX_TIMEADJ_VALUE  0x7FFFFFFFFFFFFFFFULL
 
 #define IXGBE_OVERFLOW_PERIOD    (HZ * 30)
 #define IXGBE_PTP_TX_TIMEOUT     (HZ * 15)
@@ -104,8 +104,68 @@
  */
 #define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL
 
+/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL
+ * which contain measurements of seconds and nanoseconds respectively. This
+ * matches the standard linux representation of time in the kernel. In addition,
+ * the X550 also has a SYSTIMER register which represents residue, or
+ * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA
+ * register is used, but it is unlike the X540 and 82599 devices. TIMINCA
+ * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the
+ * high bit representing whether the adjustent is positive or negative. Every
+ * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range
+ * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the
+ * X550's clock for purposes of SYSTIME generation is constant and not dependent
+ * on the link speed.
+ *
+ *           SYSTIMEH           SYSTIMEL        SYSTIMER
+ *       +--------------+  +--------------+  +-------------+
+ * X550  |      32      |  |      32      |  |     32      |
+ *       *--------------+  +--------------+  +-------------+
+ *       \____seconds___/   \_nanoseconds_/  \__2^-32 ns__/
+ *
+ * This results in a full 96 bits to represent the clock, with 32 bits for
+ * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under
+ * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for
+ * underflow of adjustments.
+ *
+ * The 32 bits of seconds for the X550 overflows every
+ *   2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years.
+ *
+ * In order to adjust the clock frequency for the X550, the TIMINCA register is
+ * provided. This register represents a + or minus nearly 0.5 ns adjustment to
+ * the base frequency. It is measured in 2^-32 ns units, with the high bit being
+ * the sign bit. This register enables software to calculate frequency
+ * adjustments and apply them directly to the clock rate.
+ *
+ * The math for converting ppb into TIMINCA values is fairly straightforward.
+ *   TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL
+ *
+ * This assumes that ppb is never high enough to create a value bigger than
+ * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this
+ * value is also simple.
+ *   Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL
+ *
+ * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is
+ * 12.5 nanoseconds. This means that the Max ppb is 39999999
+ *   Note: We subtract one in order to ensure no overflow, because the TIMINCA
+ *         register can only hold slightly under 0.5 nanoseconds.
+ *
+ * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns
+ * into 2^-32 units, which is
+ *
+ *  12.5 * 2^32 = C80000000
+ *
+ * Some revisions of hardware have a faster base frequency than the registers
+ * were defined for. To fix this, we use a timecounter structure with the
+ * proper mult and shift to convert the cycles into nanoseconds of time.
+ */
+#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL
+#define INCVALUE_MASK	0x7FFFFFFF
+#define ISGN		0x80000000
+#define MAX_TIMADJ	0x7FFFFFFF
+
 /**
- * ixgbe_ptp_setup_sdp
+ * ixgbe_ptp_setup_sdp_x540
  * @hw: the hardware private structure
  *
  * this function enables or disables the clock out feature on SDP0 for
@@ -116,83 +176,116 @@
  * aligns the start of the PPS signal to that value. The shift is
  * necessary because it can change based on the link speed.
  */
-static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int shift = adapter->cc.shift;
+	int shift = adapter->hw_cc.shift;
 	u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem;
 	u64 ns = 0, clock_edge = 0;
 
-	if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) &&
-	    (hw->mac.type == ixgbe_mac_X540)) {
+	/* disable the pin first */
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+	IXGBE_WRITE_FLUSH(hw);
 
-		/* disable the pin first */
-		IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
-		IXGBE_WRITE_FLUSH(hw);
+	if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+		return;
 
-		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
 
-		/*
-		 * enable the SDP0 pin as output, and connected to the
-		 * native function for Timesync (ClockOut)
-		 */
-		esdp |= (IXGBE_ESDP_SDP0_DIR |
-			 IXGBE_ESDP_SDP0_NATIVE);
+	/* enable the SDP0 pin as output, and connected to the
+	 * native function for Timesync (ClockOut)
+	 */
+	esdp |= IXGBE_ESDP_SDP0_DIR |
+		IXGBE_ESDP_SDP0_NATIVE;
 
-		/*
-		 * enable the Clock Out feature on SDP0, and allow
-		 * interrupts to occur when the pin changes
-		 */
-		tsauxc = (IXGBE_TSAUXC_EN_CLK |
-			  IXGBE_TSAUXC_SYNCLK |
-			  IXGBE_TSAUXC_SDP0_INT);
+	/* enable the Clock Out feature on SDP0, and allow
+	 * interrupts to occur when the pin changes
+	 */
+	tsauxc = IXGBE_TSAUXC_EN_CLK |
+		 IXGBE_TSAUXC_SYNCLK |
+		 IXGBE_TSAUXC_SDP0_INT;
 
-		/* clock period (or pulse length) */
-		clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
-		clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
+	/* clock period (or pulse length) */
+	clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift);
+	clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32);
 
-		/*
-		 * Account for the cyclecounter wrap-around value by
-		 * using the converted ns value of the current time to
-		 * check for when the next aligned second would occur.
-		 */
-		clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
-		clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
-		ns = timecounter_cyc2time(&adapter->tc, clock_edge);
+	/* Account for the cyclecounter wrap-around value by
+	 * using the converted ns value of the current time to
+	 * check for when the next aligned second would occur.
+	 */
+	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+	ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge);
 
-		div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
-		clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
+	div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem);
+	clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift);
 
-		/* specify the initial clock start time */
-		trgttiml = (u32)clock_edge;
-		trgttimh = (u32)(clock_edge >> 32);
+	/* specify the initial clock start time */
+	trgttiml = (u32)clock_edge;
+	trgttimh = (u32)(clock_edge >> 32);
 
-		IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml);
-		IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh);
-		IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
-		IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+	IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml);
+	IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
 
-		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
-		IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
-	} else {
-		IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
-	}
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
 
 	IXGBE_WRITE_FLUSH(hw);
 }
 
 /**
- * ixgbe_ptp_read - read raw cycle counter (to be used by time counter)
+ * ixgbe_ptp_read_X550 - read cycle counter value
+ * @hw_cc: cyclecounter structure
+ *
+ * This function reads SYSTIME registers. It is called by the cyclecounter
+ * structure to convert from internal representation into nanoseconds. We need
+ * this for X550 since some skews do not have expected clock frequency and
+ * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
+ * "cycles", rather than seconds and nanoseconds.
+ */
+static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc)
+{
+	struct ixgbe_adapter *adapter =
+			container_of(hw_cc, struct ixgbe_adapter, hw_cc);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct timespec64 ts;
+
+	/* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'.
+	 * Some revisions of hardware run at a higher frequency and so the
+	 * cycles are not guaranteed to be nanoseconds. The timespec64 created
+	 * here is used for its math/conversions but does not necessarily
+	 * represent nominal time.
+	 *
+	 * It should be noted that this cyclecounter will overflow at a
+	 * non-bitmask field since we have to convert our billions of cycles
+	 * into an actual cycles count. This results in some possible weird
+	 * situations at high cycle counter stamps. However given that 32 bits
+	 * of "seconds" is ~138 years this isn't a problem. Even at the
+	 * increased frequency of some revisions, this is still ~103 years.
+	 * Since the SYSTIME values start at 0 and we never write them, it is
+	 * highly unlikely for the cyclecounter to overflow in practice.
+	 */
+	IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+	ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+
+	return (u64)timespec64_to_ns(&ts);
+}
+
+/**
+ * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter)
  * @cc: the cyclecounter structure
  *
  * this function reads the cyclecounter registers and is called by the
  * cyclecounter structure used to construct a ns counter from the
  * arbitrary fixed point registers
  */
-static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc)
+static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc)
 {
 	struct ixgbe_adapter *adapter =
-		container_of(cc, struct ixgbe_adapter, cc);
+		container_of(cc, struct ixgbe_adapter, hw_cc);
 	struct ixgbe_hw *hw = &adapter->hw;
 	u64 stamp = 0;
 
@@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc)
 }
 
 /**
- * ixgbe_ptp_adjfreq
+ * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp
+ * @adapter: private adapter structure
+ * @hwtstamp: stack timestamp structure
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value
+ * which can be used by the stack's ptp functions.
+ *
+ * The lock is used to protect consistency of the cyclecounter and the SYSTIME
+ * registers. However, it does not need to protect against the Rx or Tx
+ * timestamp registers, as there can't be a new timestamp until the old one is
+ * unlatched by reading.
+ *
+ * In addition to the timestamp in hardware, some controllers need a software
+ * overflow cyclecounter, and this function takes this into account as well.
+ **/
+static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter,
+					  struct skb_shared_hwtstamps *hwtstamp,
+					  u64 timestamp)
+{
+	unsigned long flags;
+	struct timespec64 systime;
+	u64 ns;
+
+	memset(hwtstamp, 0, sizeof(*hwtstamp));
+
+	switch (adapter->hw.mac.type) {
+	/* X550 and later hardware supposedly represent time using a seconds
+	 * and nanoseconds counter, instead of raw 64bits nanoseconds. We need
+	 * to convert the timestamp into cycles before it can be fed to the
+	 * cyclecounter. We need an actual cyclecounter because some revisions
+	 * of hardware run at a higher frequency and thus the counter does
+	 * not represent seconds/nanoseconds. Instead it can be thought of as
+	 * cycles and billions of cycles.
+	 */
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+		/* Upper 32 bits represent billions of cycles, lower 32 bits
+		 * represent cycles. However, we use timespec64_to_ns for the
+		 * correct math even though the units haven't been corrected
+		 * yet.
+		 */
+		systime.tv_sec = timestamp >> 32;
+		systime.tv_nsec = timestamp & 0xFFFFFFFF;
+
+		timestamp = timespec64_to_ns(&systime);
+		break;
+	default:
+		break;
+	}
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_cyc2time(&adapter->hw_tc, timestamp);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	hwtstamp->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * ixgbe_ptp_adjfreq_82599
  * @ptp: the ptp clock structure
  * @ppb: parts per billion adjustment from base
  *
  * adjust the frequency of the ptp cycle counter by the
  * indicated ppb from the base frequency.
  */
-static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb)
 {
 	struct ixgbe_adapter *adapter =
 		container_of(ptp, struct ixgbe_adapter, ptp_caps);
 	struct ixgbe_hw *hw = &adapter->hw;
-	u64 freq;
-	u32 diff, incval;
+	u64 freq, incval;
+	u32 diff;
 	int neg_adj = 0;
 
 	if (ppb < 0) {
@@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_X540:
-		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
+		if (incval > 0xFFFFFFFFULL)
+			e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval);
 		break;
 	case ixgbe_mac_82599EB:
+		if (incval > 0x00FFFFFFULL)
+			e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
 		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
 				(1 << IXGBE_INCPER_SHIFT_82599) |
-				incval);
+				((u32)incval & 0x00FFFFFFUL));
 		break;
 	default:
 		break;
@@ -250,6 +406,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 }
 
 /**
+ * ixgbe_ptp_adjfreq_X550
+ * @ptp: the ptp clock structure
+ * @ppb: parts per billion adjustment from base
+ *
+ * adjust the frequency of the SYSTIME registers by the indicated ppb from base
+ * frequency
+ */
+static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct ixgbe_adapter *adapter =
+			container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int neg_adj = 0;
+	u64 rate = IXGBE_X550_BASE_PERIOD;
+	u32 inca;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	rate *= ppb;
+	rate = div_u64(rate, 1000000000ULL);
+
+	/* warn if rate is too large */
+	if (rate >= INCVALUE_MASK)
+		e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n");
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca);
+
+	return 0;
+}
+
+/**
  * ixgbe_ptp_adjtime
  * @ptp: the ptp clock structure
  * @delta: offset to adjust the cycle counter by
@@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	timecounter_adjtime(&adapter->tc, delta);
+	timecounter_adjtime(&adapter->hw_tc, delta);
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
-	ixgbe_ptp_setup_sdp(adapter);
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
 
 	return 0;
 }
@@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
 	struct ixgbe_adapter *adapter =
 		container_of(ptp, struct ixgbe_adapter, ptp_caps);
-	u64 ns;
 	unsigned long flags;
+	u64 ns;
 
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	ns = timecounter_read(&adapter->tc);
+	ns = timecounter_read(&adapter->hw_tc);
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
 	*ts = ns_to_timespec64(ns);
@@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp,
 {
 	struct ixgbe_adapter *adapter =
 		container_of(ptp, struct ixgbe_adapter, ptp_caps);
-	u64 ns;
 	unsigned long flags;
-
-	ns = timespec64_to_ns(ts);
+	u64 ns = timespec64_to_ns(ts);
 
 	/* reset the timecounter */
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	timecounter_init(&adapter->tc, &adapter->cc, ns);
+	timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns);
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
-	ixgbe_ptp_setup_sdp(adapter);
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
 	return 0;
 }
 
@@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp,
 	 * event when the clock SDP triggers. Clear mask when PPS is
 	 * disabled
 	 */
-	if (rq->type == PTP_CLK_REQ_PPS) {
-		switch (adapter->hw.mac.type) {
-		case ixgbe_mac_X540:
-			if (on)
-				adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
-			else
-				adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
-
-			ixgbe_ptp_setup_sdp(adapter);
-			return 0;
-		default:
-			break;
-		}
-	}
+	if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp)
+		return -ENOTSUPP;
+
+	if (on)
+		adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
+	else
+		adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
 
-	return -ENOTSUPP;
+	adapter->ptp_setup_sdp(adapter);
+	return 0;
 }
 
 /**
  * ixgbe_ptp_check_pps_event
  * @adapter: the private adapter structure
- * @eicr: the interrupt cause register value
  *
  * This function is called by the interrupt routine when checking for
  * interrupts. It will check and handle a pps event.
  */
-void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr)
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ptp_clock_event event;
@@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	struct ixgbe_ring *rx_ring;
 	unsigned long rx_event;
+	int n;
 
 	/* if we don't have a valid timestamp in the registers, just update the
 	 * timeout counter and exit
@@ -437,19 +625,43 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter)
 
 	/* determine the most recent watchdog or rx_timestamp event */
 	rx_event = adapter->last_rx_ptp_check;
-	if (time_after(adapter->last_rx_timestamp, rx_event))
-		rx_event = adapter->last_rx_timestamp;
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		if (time_after(rx_ring->last_rx_timestamp, rx_event))
+			rx_event = rx_ring->last_rx_timestamp;
+	}
 
 	/* only need to read the high RXSTMP register to clear the lock */
-	if (time_is_before_jiffies(rx_event + 5*HZ)) {
+	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
 		IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
 		adapter->last_rx_ptp_check = jiffies;
 
+		adapter->rx_hwtstamp_cleared++;
 		e_warn(drv, "clearing RX Timestamp hang\n");
 	}
 }
 
 /**
+ * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state
+ * @adapter: the private adapter structure
+ *
+ * This function should be called whenever the state related to a Tx timestamp
+ * needs to be cleared. This helps ensure that all related bits are reset for
+ * the next Tx timestamp event.
+ */
+static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
+	if (adapter->ptp_tx_skb) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+	}
+	clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+}
+
+/**
  * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp
  * @adapter: the private adapter struct
  *
@@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct skb_shared_hwtstamps shhwtstamps;
-	u64 regval = 0, ns;
-	unsigned long flags;
+	u64 regval = 0;
 
 	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL);
 	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32;
 
-	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	ns = timecounter_cyc2time(&adapter->tc, regval);
-	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
-	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
-	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+	ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval);
 	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
 
-	dev_kfree_skb_any(adapter->ptp_tx_skb);
-	adapter->ptp_tx_skb = NULL;
-	clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+	ixgbe_ptp_clear_tx_timestamp(adapter);
 }
 
 /**
@@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work)
 					      IXGBE_PTP_TX_TIMEOUT);
 	u32 tsynctxctl;
 
-	if (timeout) {
-		dev_kfree_skb_any(adapter->ptp_tx_skb);
-		adapter->ptp_tx_skb = NULL;
-		clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
-		e_warn(drv, "clearing Tx Timestamp hang\n");
+	/* we have to have a valid skb to poll for a timestamp */
+	if (!adapter->ptp_tx_skb) {
+		ixgbe_ptp_clear_tx_timestamp(adapter);
 		return;
 	}
 
+	/* stop polling once we have a valid timestamp */
 	tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
-	if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID)
+	if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) {
 		ixgbe_ptp_tx_hwtstamp(adapter);
-	else
+		return;
+	}
+
+	if (timeout) {
+		ixgbe_ptp_clear_tx_timestamp(adapter);
+		adapter->tx_hwtstamp_timeouts++;
+		e_warn(drv, "clearing Tx Timestamp hang\n");
+	} else {
 		/* reschedule to keep checking if it's not available yet */
 		schedule_work(&adapter->ptp_tx_work);
+	}
 }
 
 /**
- * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp
- * @adapter: pointer to adapter struct
+ * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: the packet
+ *
+ * This function will be called by the Rx routine of the timestamp for this
+ * packet is stored in the buffer. The value is stored in little endian format
+ * starting at the end of the packet data.
+ */
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	__le64 regval;
+
+	/* copy the bits out of the skb, and then trim the skb length */
+	skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, &regval,
+		      IXGBE_TS_HDR_LEN);
+	__pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN);
+
+	/* The timestamp is recorded in little endian format, and is stored at
+	 * the end of the packet.
+	 *
+	 * DWORD: N              N + 1      N + 2
+	 * Field: End of Packet  SYSTIMH    SYSTIML
+	 */
+	ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+				      le64_to_cpu(regval));
+}
+
+/**
+ * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp
+ * @q_vector: structure containing interrupt and ring information
  * @skb: particular skb to send timestamp with
  *
  * if the timestamp is valid, we convert it into the timecounter ns
  * value, then store that result into the shhwtstamps structure which
  * is passed up the network stack
  */
-void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb)
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct skb_shared_hwtstamps *shhwtstamps;
-	u64 regval = 0, ns;
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_hw *hw;
+	u64 regval = 0;
 	u32 tsyncrxctl;
-	unsigned long flags;
+
+	/* we cannot process timestamps on a ring without a q_vector */
+	if (!q_vector || !q_vector->adapter)
+		return;
+
+	adapter = q_vector->adapter;
+	hw = &adapter->hw;
+
+	/* Read the tsyncrxctl register afterwards in order to prevent taking an
+	 * I/O hit on every packet.
+	 */
 
 	tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
 	if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID))
@@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb)
 	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
 	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
 
-	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-	ns = timecounter_cyc2time(&adapter->tc, regval);
-	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
-	shhwtstamps = skb_hwtstamps(skb);
-	shhwtstamps->hwtstamp = ns_to_ktime(ns);
-
-	/* Update the last_rx_timestamp timer in order to enable watchdog check
-	 * for error case of latched timestamp on a dropped packet.
-	 */
-	adapter->last_rx_timestamp = jiffies;
+	ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
 }
 
 int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
@@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	case HWTSTAMP_FILTER_NONE:
 		tsync_rx_ctl = 0;
 		tsync_rx_mtrl = 0;
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
 		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG;
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
 		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
 		is_l2 = true;
 		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
 	case HWTSTAMP_FILTER_ALL:
+		/* The X550 controller is capable of timestamping all packets,
+		 * which allows it to accept any filter.
+		 */
+		if (hw->mac.type >= ixgbe_mac_X550) {
+			tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL;
+			config->rx_filter = HWTSTAMP_FILTER_ALL;
+			adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+			break;
+		}
+		/* fall through */
 	default:
 		/*
 		 * register RXMTRL must be set in order to do V1 packets,
@@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 		 * Delay_Req messages and hardware does not support
 		 * timestamping all packets => return error
 		 */
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		config->rx_filter = HWTSTAMP_FILTER_NONE;
 		return -ERANGE;
 	}
 
 	if (hw->mac.type == ixgbe_mac_82598EB) {
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
 		if (tsync_rx_ctl | tsync_tx_ctl)
 			return -ERANGE;
 		return 0;
 	}
 
+	/* Per-packet timestamping only works if the filter is set to all
+	 * packets. Since this is desired, always timestamp all packets as long
+	 * as any Rx filter was configured.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+		/* enable timestamping all packets only if at least some
+		 * packets were requested. Otherwise, play nice and disable
+		 * timestamping
+		 */
+		if (config->rx_filter == HWTSTAMP_FILTER_NONE)
+			break;
+
+		tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED |
+			       IXGBE_TSYNCRXCTL_TYPE_ALL |
+			       IXGBE_TSYNCRXCTL_TSIP_UT_EN;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+		adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
+		is_l2 = true;
+		break;
+	default:
+		break;
+	}
+
 	/* define ethertype filter for timestamping L2 packets */
 	if (is_l2)
 		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588),
@@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	IXGBE_WRITE_FLUSH(hw);
 
 	/* clear TX/RX time stamp registers, just to be sure */
-	regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
-	regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+	ixgbe_ptp_clear_tx_timestamp(adapter);
+	IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
 
 	return 0;
 }
@@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
 		-EFAULT : 0;
 }
 
-/**
- * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
- * @adapter: pointer to the adapter structure
- *
- * This function should be called to set the proper values for the TIMINCA
- * register and tell the cyclecounter structure what the tick rate of SYSTIME
- * is. It does not directly modify SYSTIME registers or the timecounter
- * structure. It should be called whenever a new TIMINCA value is necessary,
- * such as during initialization or when the link speed changes.
- */
-void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter,
+					u32 *shift, u32 *incval)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 incval = 0;
-	u32 shift = 0;
-	unsigned long flags;
-
 	/**
 	 * Scale the NIC cycle counter by a large factor so that
 	 * relatively small corrections to the frequency can be added
@@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
 	 */
 	switch (adapter->link_speed) {
 	case IXGBE_LINK_SPEED_100_FULL:
-		incval = IXGBE_INCVAL_100;
-		shift = IXGBE_INCVAL_SHIFT_100;
+		*shift = IXGBE_INCVAL_SHIFT_100;
+		*incval = IXGBE_INCVAL_100;
 		break;
 	case IXGBE_LINK_SPEED_1GB_FULL:
-		incval = IXGBE_INCVAL_1GB;
-		shift = IXGBE_INCVAL_SHIFT_1GB;
+		*shift = IXGBE_INCVAL_SHIFT_1GB;
+		*incval = IXGBE_INCVAL_1GB;
 		break;
 	case IXGBE_LINK_SPEED_10GB_FULL:
 	default:
-		incval = IXGBE_INCVAL_10GB;
-		shift = IXGBE_INCVAL_SHIFT_10GB;
+		*shift = IXGBE_INCVAL_SHIFT_10GB;
+		*incval = IXGBE_INCVAL_10GB;
 		break;
 	}
+}
 
-	/**
-	 * Modify the calculated values to fit within the correct
-	 * number of bits specified by the hardware. The 82599 doesn't
-	 * have the same space as the X540, so bitshift the calculated
-	 * values to fit.
+/**
+ * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
+ * @adapter: pointer to the adapter structure
+ *
+ * This function should be called to set the proper values for the TIMINCA
+ * register and tell the cyclecounter structure what the tick rate of SYSTIME
+ * is. It does not directly modify SYSTIME registers or the timecounter
+ * structure. It should be called whenever a new TIMINCA value is necessary,
+ * such as during initialization or when the link speed changes.
+ */
+void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct cyclecounter cc;
+	unsigned long flags;
+	u32 incval = 0;
+	u32 tsauxc = 0;
+	u32 fuse0 = 0;
+
+	/* For some of the boards below this mask is technically incorrect.
+	 * The timestamp mask overflows at approximately 61bits. However the
+	 * particular hardware does not overflow on an even bitmask value.
+	 * Instead, it overflows due to conversion of upper 32bits billions of
+	 * cycles. Timecounters are not really intended for this purpose so
+	 * they do not properly function if the overflow point isn't 2^N-1.
+	 * However, the actual SYSTIME values in question take ~138 years to
+	 * overflow. In practice this means they won't actually overflow. A
+	 * proper fix to this problem would require modification of the
+	 * timecounter delta calculations.
 	 */
+	cc.mask = CLOCKSOURCE_MASK(64);
+	cc.mult = 1;
+	cc.shift = 0;
+
 	switch (hw->mac.type) {
+	case ixgbe_mac_X550EM_x:
+		/* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is
+		 * designed to represent seconds and nanoseconds when this is
+		 * the case. However, some revisions of hardware have a 400Mhz
+		 * clock and we have to compensate for this frequency
+		 * variation using corrected mult and shift values.
+		 */
+		fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0));
+		if (!(fuse0 & IXGBE_FUSES0_300MHZ)) {
+			cc.mult = 3;
+			cc.shift = 2;
+		}
+		/* fallthrough */
+	case ixgbe_mac_X550:
+		cc.read = ixgbe_ptp_read_X550;
+
+		/* enable SYSTIME counter */
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+		tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+		IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+				tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+		IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+
+		IXGBE_WRITE_FLUSH(hw);
+		break;
 	case ixgbe_mac_X540:
+		cc.read = ixgbe_ptp_read_82599;
+
+		ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
 		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
 		break;
 	case ixgbe_mac_82599EB:
+		cc.read = ixgbe_ptp_read_82599;
+
+		ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
 		incval >>= IXGBE_INCVAL_SHIFT_82599;
-		shift -= IXGBE_INCVAL_SHIFT_82599;
+		cc.shift -= IXGBE_INCVAL_SHIFT_82599;
 		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
-				(1 << IXGBE_INCPER_SHIFT_82599) |
-				incval);
+				(1 << IXGBE_INCPER_SHIFT_82599) | incval);
 		break;
 	default:
 		/* other devices aren't supported */
@@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
 
 	/* need lock to prevent incorrect read while modifying cyclecounter */
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
-	memset(&adapter->cc, 0, sizeof(adapter->cc));
-	adapter->cc.read = ixgbe_ptp_read;
-	adapter->cc.mask = CYCLECOUNTER_MASK(64);
-	adapter->cc.shift = shift;
-	adapter->cc.mult = 1;
-
+	memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc));
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 }
 
@@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	unsigned long flags;
 
-	/* set SYSTIME registers to 0 just in case */
-	IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000);
-	IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000);
-	IXGBE_WRITE_FLUSH(hw);
-
 	/* reset the hardware timestamping mode */
 	ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
 
+	/* 82598 does not support PTP */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
 	ixgbe_ptp_start_cyclecounter(adapter);
 
 	spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
-	/* reset the ns time counter */
-	timecounter_init(&adapter->tc, &adapter->cc,
+	timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
 			 ktime_to_ns(ktime_get_real()));
-
 	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
 
-	/*
-	 * Now that the shift has been calculated and the systime
+	adapter->last_overflow_check = jiffies;
+
+	/* Now that the shift has been calculated and the systime
 	 * registers reset, (re-)enable the Clock out feature
 	 */
-	ixgbe_ptp_setup_sdp(adapter);
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
 }
 
 /**
@@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
  *
  * This function performs setup of the user entry point function table and
  * initializes the PTP clock device, which is used to access the clock-like
- * features of the PTP core. It will be called by ixgbe_ptp_init, only if
- * there isn't already a clock device (such as after a suspend/resume cycle,
- * where the clock device wasn't destroyed).
+ * features of the PTP core. It will be called by ixgbe_ptp_init, and may
+ * reuse a previously initialized clock (such as during a suspend/resume
+ * cycle).
  */
-static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
+static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	long err;
@@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.n_per_out = 0;
 		adapter->ptp_caps.pps = 1;
-		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq;
+		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
 		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540;
 		break;
 	case ixgbe_mac_82599EB:
 		snprintf(adapter->ptp_caps.name,
@@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.n_per_out = 0;
 		adapter->ptp_caps.pps = 0;
-		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq;
+		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599;
+		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
+		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		break;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+		snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 30000000;
+		adapter->ptp_caps.n_alarm = 0;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.n_per_out = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550;
 		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
 		adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime;
 		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
 		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		adapter->ptp_setup_sdp = NULL;
 		break;
 	default:
 		adapter->ptp_clock = NULL;
+		adapter->ptp_setup_sdp = NULL;
 		return -EOPNOTSUPP;
 	}
 
@@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter)
 	if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state))
 		return;
 
-	/* since this might be called in suspend, we don't clear the state,
-	 * but simply reset the auxiliary PPS signal control register
-	 */
-	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0);
+	adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
 
 	/* ensure that we cancel any pending PTP Tx work item in progress */
 	cancel_work_sync(&adapter->ptp_tx_work);
-	if (adapter->ptp_tx_skb) {
-		dev_kfree_skb_any(adapter->ptp_tx_skb);
-		adapter->ptp_tx_skb = NULL;
-		clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
-	}
+	ixgbe_ptp_clear_tx_timestamp(adapter);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index fcd8b27a0ccb..eeff3d075bf8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2014 Intel Corporation.
+  Copyright(c) 1999 - 2015 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
 	return -ENOMEM;
 }
 
+/**
+ * ixgbe_get_vfs - Find and take references to all vf devices
+ * @adapter: Pointer to adapter struct
+ */
+static void ixgbe_get_vfs(struct ixgbe_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	u16 vendor = pdev->vendor;
+	struct pci_dev *vfdev;
+	int vf = 0;
+	u16 vf_id;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos)
+		return;
+	pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
+
+	vfdev = pci_get_device(vendor, vf_id, NULL);
+	for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
+		if (!vfdev->is_virtfn)
+			continue;
+		if (vfdev->physfn != pdev)
+			continue;
+		if (vf >= adapter->num_vfs)
+			continue;
+		pci_dev_get(vfdev);
+		adapter->vfinfo[vf].vfdev = vfdev;
+		++vf;
+	}
+}
+
 /* Note this function is called when the user wants to enable SR-IOV
  * VFs using the now deprecated module parameter
  */
@@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
 		}
 	}
 
-	if (!__ixgbe_enable_sriov(adapter))
+	if (!__ixgbe_enable_sriov(adapter)) {
+		ixgbe_get_vfs(adapter);
 		return;
+	}
 
 	/* If we have gotten to this point then there is no memory available
 	 * to manage the VF devices - print message and bail.
@@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
 #endif /* #ifdef CONFIG_PCI_IOV */
 int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 {
+	unsigned int num_vfs = adapter->num_vfs, vf;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 gpie;
 	u32 vmdctl;
@@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 	/* set num VFs to 0 to prevent access to vfinfo */
 	adapter->num_vfs = 0;
 
+	/* put the reference to all of the vf devices */
+	for (vf = 0; vf < num_vfs; ++vf) {
+		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+
+		if (!vfdev)
+			continue;
+		adapter->vfinfo[vf].vfdev = NULL;
+		pci_dev_put(vfdev);
+	}
+
 	/* free VF control structures */
 	kfree(adapter->vfinfo);
 	adapter->vfinfo = NULL;
@@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
 		e_dev_warn("Failed to enable PCI sriov: %d\n", err);
 		return err;
 	}
+	ixgbe_get_vfs(adapter);
 	ixgbe_sriov_reinit(adapter);
 
 	return num_vfs;
@@ -406,11 +452,34 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
 			     u32 vf)
 {
-	/* VLAN 0 is a special case, don't allow it to be removed */
-	if (!vid && !add)
-		return 0;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	/* If VLAN overlaps with one the PF is currently monitoring make
+	 * sure that we are able to allocate a VLVF entry.  This may be
+	 * redundant but it guarantees PF will maintain visibility to
+	 * the VLAN.
+	 */
+	if (add && test_bit(vid, adapter->active_vlans)) {
+		err = hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), true, false);
+		if (err)
+			return err;
+	}
+
+	err = hw->mac.ops.set_vfta(hw, vid, vf, !!add, false);
 
-	return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add);
+	if (add && !err)
+		return err;
+
+	/* If we failed to add the VF VLAN or we are removing the VF VLAN
+	 * we may need to drop the PF pool bit in order to allow us to free
+	 * up the VLVF resources.
+	 */
+	if (test_bit(vid, adapter->active_vlans) ||
+	    (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		ixgbe_update_pf_promisc_vlvf(adapter, vid);
+
+	return err;
 }
 
 static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
@@ -516,13 +585,75 @@ static void ixgbe_clear_vmvir(struct ixgbe_adapter *adapter, u32 vf)
 
 	IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), 0);
 }
+
+static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 i;
+
+	/* post increment loop, covers VLVF_ENTRIES - 1 to 0 */
+	for (i = IXGBE_VLVF_ENTRIES; i--;) {
+		u32 word = IXGBE_VLVFB(i * 2 + vf / 32);
+		u32 bits[2], vlvfb, vid, vfta, vlvf;
+		u32 mask = 1 << (vf / 32);
+
+		vlvfb = IXGBE_READ_REG(hw, word);
+
+		/* if our bit isn't set we can skip it */
+		if (!(vlvfb & mask))
+			continue;
+
+		/* clear our bit from vlvfb */
+		vlvfb ^= mask;
+
+		/* create 64b mask to chedk to see if we should clear VLVF */
+		bits[word % 2] = vlvfb;
+		bits[(word % 2) ^ 1] = IXGBE_READ_REG(hw, word ^ 1);
+
+		/* if promisc is enabled, PF will be present, leave VFTA */
+		if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) {
+			bits[VMDQ_P(0) / 32] &= ~(1 << (VMDQ_P(0) % 32));
+
+			if (bits[0] || bits[1])
+				goto update_vlvfb;
+			goto update_vlvf;
+		}
+
+		/* if other pools are present, just remove ourselves */
+		if (bits[0] || bits[1])
+			goto update_vlvfb;
+
+		/* if we cannot determine VLAN just remove ourselves */
+		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
+		if (!vlvf)
+			goto update_vlvfb;
+
+		vid = vlvf & VLAN_VID_MASK;
+		mask = 1 << (vid % 32);
+
+		/* clear bit from VFTA */
+		vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid / 32));
+		if (vfta & mask)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTA(vid / 32), vfta ^ mask);
+update_vlvf:
+		/* clear POOL selection enable */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), 0);
+update_vlvfb:
+		/* clear pool bits */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), vlvfb);
+	}
+}
+
 static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
 	u8 num_tcs = netdev_get_num_tc(adapter->netdev);
 
-	/* add PF assigned VLAN or VLAN 0 */
+	/* remove VLAN filters beloning to this VF */
+	ixgbe_clear_vf_vlans(adapter, vf);
+
+	/* add back PF assigned VLAN or VLAN 0 */
 	ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
 
 	/* reset offloads to defaults */
@@ -768,40 +899,14 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
 	return ixgbe_set_vf_mac(adapter, vf, new_mac) < 0;
 }
 
-static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan)
-{
-	u32 vlvf;
-	s32 regindex;
-
-	/* short cut the special case */
-	if (vlan == 0)
-		return 0;
-
-	/* Search for the vlan id in the VLVF entries */
-	for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) {
-		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
-		if ((vlvf & VLAN_VID_MASK) == vlan)
-			break;
-	}
-
-	/* Return a negative value if not found */
-	if (regindex >= IXGBE_VLVF_ENTRIES)
-		regindex = -1;
-
-	return regindex;
-}
-
 static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
 				 u32 *msgbuf, u32 vf)
 {
+	u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
+	u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
+	u8 tcs = netdev_get_num_tc(adapter->netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	int add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
-	int vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
 	int err;
-	s32 reg_ndx;
-	u32 vlvf;
-	u32 bits;
-	u8 tcs = netdev_get_num_tc(adapter->netdev);
 
 	if (adapter->vfinfo[vf].pf_vlan || tcs) {
 		e_warn(drv,
@@ -811,54 +916,23 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
 		return -1;
 	}
 
-	if (add)
-		adapter->vfinfo[vf].vlan_count++;
-	else if (adapter->vfinfo[vf].vlan_count)
-		adapter->vfinfo[vf].vlan_count--;
-
-	/* in case of promiscuous mode any VLAN filter set for a VF must
-	 * also have the PF pool added to it.
-	 */
-	if (add && adapter->netdev->flags & IFF_PROMISC)
-		err = ixgbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0));
+	/* VLAN 0 is a special case, don't allow it to be removed */
+	if (!vid && !add)
+		return 0;
 
 	err = ixgbe_set_vf_vlan(adapter, add, vid, vf);
-	if (!err && adapter->vfinfo[vf].spoofchk_enabled)
-		hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
+	if (err)
+		return err;
 
-	/* Go through all the checks to see if the VLAN filter should
-	 * be wiped completely.
-	 */
-	if (!add && adapter->netdev->flags & IFF_PROMISC) {
-		reg_ndx = ixgbe_find_vlvf_entry(hw, vid);
-		if (reg_ndx < 0)
-			return err;
-		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(reg_ndx));
-		/* See if any other pools are set for this VLAN filter
-		 * entry other than the PF.
-		 */
-		if (VMDQ_P(0) < 32) {
-			bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(reg_ndx * 2));
-			bits &= ~(1 << VMDQ_P(0));
-			bits |= IXGBE_READ_REG(hw,
-					       IXGBE_VLVFB(reg_ndx * 2) + 1);
-		} else {
-			bits = IXGBE_READ_REG(hw,
-					      IXGBE_VLVFB(reg_ndx * 2) + 1);
-			bits &= ~(1 << (VMDQ_P(0) - 32));
-			bits |= IXGBE_READ_REG(hw, IXGBE_VLVFB(reg_ndx * 2));
-		}
+	if (adapter->vfinfo[vf].spoofchk_enabled)
+		hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
 
-		/* If the filter was removed then ensure PF pool bit
-		 * is cleared if the PF only added itself to the pool
-		 * because the PF is in promiscuous mode.
-		 */
-		if ((vlvf & VLAN_VID_MASK) == vid &&
-		    !test_bit(vid, adapter->active_vlans) && !bits)
-			ixgbe_set_vf_vlan(adapter, add, vid, VMDQ_P(0));
-	}
+	if (add)
+		adapter->vfinfo[vf].vlan_count++;
+	else if (adapter->vfinfo[vf].vlan_count)
+		adapter->vfinfo[vf].vlan_count--;
 
-	return err;
+	return 0;
 }
 
 static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
@@ -1239,6 +1313,9 @@ static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf,
 	if (err)
 		goto out;
 
+	/* Revoke tagless access via VLAN 0 */
+	ixgbe_set_vf_vlan(adapter, false, 0, vf);
+
 	ixgbe_set_vmvir(adapter, vlan, qos, vf);
 	ixgbe_set_vmolr(hw, vf, false);
 	if (adapter->vfinfo[vf].spoofchk_enabled)
@@ -1272,6 +1349,8 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
 
 	err = ixgbe_set_vf_vlan(adapter, false,
 				adapter->vfinfo[vf].pf_vlan, vf);
+	/* Restore tagless access via VLAN 0 */
+	ixgbe_set_vf_vlan(adapter, true, 0, vf);
 	ixgbe_clear_vmvir(adapter, vf);
 	ixgbe_set_vmolr(hw, vf, true);
 	hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 995f03107eac..06add27c8b8c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_TXSTMPH    0x08C08 /* Tx timestamp value High - RO */
 #define IXGBE_SYSTIML    0x08C0C /* System time register Low - RO */
 #define IXGBE_SYSTIMH    0x08C10 /* System time register High - RO */
+#define IXGBE_SYSTIMR    0x08C58 /* System time register Residue - RO */
 #define IXGBE_TIMINCA    0x08C14 /* Increment attributes register - RW */
 #define IXGBE_TIMADJL    0x08C18 /* Time Adjustment Offset register Low - RW */
 #define IXGBE_TIMADJH    0x08C1C /* Time Adjustment Offset register High - RW */
@@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_AUXSTMPH0  0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
 #define IXGBE_AUXSTMPL1  0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
 #define IXGBE_AUXSTMPH1  0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
+#define IXGBE_TSIM       0x08C68 /* TimeSync Interrupt Mask Register - RW */
 
 /* Diagnostic Registers */
 #define IXGBE_RDSTATCTL   0x02C20
@@ -1345,7 +1347,10 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK	0xFF01 /* int chip-wide mask */
 #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG	0xFC01 /* int chip-wide mask */
 #define IXGBE_MDIO_GLOBAL_ALARM_1		0xCC00 /* Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT	0x0010 /* device fault */
 #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL	0x4000 /* high temp failure */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG		0xC850 /* global fault msg */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP	0x8007 /* high temp failure */
 #define IXGBE_MDIO_GLOBAL_INT_MASK		0xD400 /* Global int mask */
 /* autoneg vendor alarm int enable */
 #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN	0x1000
@@ -1353,6 +1358,7 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN	0x1 /* vendor alarm int enable */
 #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT		0x200 /* vendor alarm2 int mask */
 #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN	0x4000 /* int high temp enable */
+#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN	0x0010 /*int dev fault enable */
 
 #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR	0xC30A /* PHY_XS SDA/SCL Addr Reg */
 #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA	0xC30B /* PHY_XS SDA/SCL Data Reg */
@@ -2209,6 +2215,7 @@ enum {
 #define IXGBE_TSAUXC_EN_CLK   0x00000004
 #define IXGBE_TSAUXC_SYNCLK   0x00000008
 #define IXGBE_TSAUXC_SDP0_INT 0x00000040
+#define IXGBE_TSAUXC_DISABLE_SYSTIME	0x80000000
 
 #define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
 #define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
@@ -2218,8 +2225,12 @@ enum {
 #define IXGBE_TSYNCRXCTL_TYPE_L2_V2	0x00
 #define IXGBE_TSYNCRXCTL_TYPE_L4_V1	0x02
 #define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IXGBE_TSYNCRXCTL_TYPE_ALL	0x08
 #define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
 #define IXGBE_TSYNCRXCTL_ENABLED	0x00000010 /* Rx Timestamping enabled */
+#define IXGBE_TSYNCRXCTL_TSIP_UT_EN	0x00800000 /* Rx Timestamp in Packet */
+
+#define IXGBE_TSIM_TXTS			0x00000002
 
 #define IXGBE_RXMTRL_V1_CTRLT_MASK	0x000000FF
 #define IXGBE_RXMTRL_V1_SYNC_MSG	0x00
@@ -2332,6 +2343,7 @@ enum {
 #define IXGBE_RXD_STAT_UDPV     0x400   /* Valid UDP checksum */
 #define IXGBE_RXD_STAT_DYNINT   0x800   /* Pkt caused INT via DYNINT */
 #define IXGBE_RXD_STAT_LLINT    0x800   /* Pkt caused Low Latency Interrupt */
+#define IXGBE_RXD_STAT_TSIP     0x08000 /* Time Stamp in packet buffer */
 #define IXGBE_RXD_STAT_TS       0x10000 /* Time Stamp */
 #define IXGBE_RXD_STAT_SECP     0x20000 /* Security Processing */
 #define IXGBE_RXD_STAT_LB       0x40000 /* Loopback Status */
@@ -3288,7 +3300,7 @@ struct ixgbe_mac_operations {
 	s32 (*enable_mc)(struct ixgbe_hw *);
 	s32 (*disable_mc)(struct ixgbe_hw *);
 	s32 (*clear_vfta)(struct ixgbe_hw *);
-	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
+	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool, bool);
 	s32 (*init_uta_tables)(struct ixgbe_hw *);
 	void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
 	void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index c1d4584f6469..2358c1b7d586 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
 	struct ixgbe_phy_info *phy = &hw->phy;
 
 	/* set_phy_power was set by default to NULL */
-	if (!ixgbe_mng_present(hw))
-		phy->ops.set_phy_power = ixgbe_set_copper_phy_power;
+	phy->ops.set_phy_power = ixgbe_set_copper_phy_power;
 
 	mac->mcft_size = IXGBE_X540_MC_TBL_SIZE;
 	mac->vft_size = IXGBE_X540_VFT_TBL_SIZE;
@@ -110,13 +109,14 @@ mac_reset_top:
 	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
 	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
 
 	/* Poll for reset bit to self-clear indicating reset is complete */
 	for (i = 0; i < 10; i++) {
-		udelay(1);
 		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
 		if (!(ctrl & IXGBE_CTRL_RST_MASK))
 			break;
+		udelay(1);
 	}
 
 	if (ctrl & IXGBE_CTRL_RST_MASK) {
@@ -154,12 +154,16 @@ mac_reset_top:
 
 	/* Add the SAN MAC address to the RAR only if it's a valid address */
 	if (is_valid_ether_addr(hw->mac.san_addr)) {
-		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
-				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
-
 		/* Save the SAN MAC RAR index */
 		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
 
+		hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* clear VMDq pool/queue selection for this RAR */
+		hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index,
+				       IXGBE_CLEAR_VMDQ_ALL);
+
 		/* Reserve the last RAR for the SAN MAC address */
 		hw->mac.num_rar_entries--;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index ebe0ac950b14..f4ef0d1a5dbe 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -26,6 +26,8 @@
 #include "ixgbe_common.h"
 #include "ixgbe_phy.h"
 
+static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
+
 static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
 {
 	struct ixgbe_mac_info *mac = &hw->mac;
@@ -85,79 +87,6 @@ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
 }
 
 /**
- * ixgbe_check_cs4227_reg - Perform diag on a CS4227 register
- * @hw: pointer to hardware structure
- * @reg: the register to check
- *
- * Performs a diagnostic on a register in the CS4227 chip. Returns an error
- * if it is not operating correctly.
- * This function assumes that the caller has acquired the proper semaphore.
- */
-static s32 ixgbe_check_cs4227_reg(struct ixgbe_hw *hw, u16 reg)
-{
-	s32 status;
-	u32 retry;
-	u16 reg_val;
-
-	reg_val = (IXGBE_CS4227_EDC_MODE_DIAG << 1) | 1;
-	status = ixgbe_write_cs4227(hw, reg, reg_val);
-	if (status)
-		return status;
-	for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) {
-		msleep(IXGBE_CS4227_CHECK_DELAY);
-		reg_val = 0xFFFF;
-		ixgbe_read_cs4227(hw, reg, &reg_val);
-		if (!reg_val)
-			break;
-	}
-	if (reg_val) {
-		hw_err(hw, "CS4227 reg 0x%04X failed diagnostic\n", reg);
-		return status;
-	}
-
-	return 0;
-}
-
-/**
- * ixgbe_get_cs4227_status - Return CS4227 status
- * @hw: pointer to hardware structure
- *
- * Performs a diagnostic on the CS4227 chip. Returns an error if it is
- * not operating correctly.
- * This function assumes that the caller has acquired the proper semaphore.
- */
-static s32 ixgbe_get_cs4227_status(struct ixgbe_hw *hw)
-{
-	s32 status;
-	u16 value = 0;
-
-	/* Exit if the diagnostic has already been performed. */
-	status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value);
-	if (status)
-		return status;
-	if (value == IXGBE_CS4227_RESET_COMPLETE)
-		return 0;
-
-	/* Check port 0. */
-	status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB);
-	if (status)
-		return status;
-
-	status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB);
-	if (status)
-		return status;
-
-	/* Check port 1. */
-	status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB +
-					(1 << 12));
-	if (status)
-		return status;
-
-	return ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB +
-				      (1 << 12));
-}
-
-/**
  * ixgbe_read_pe - Read register from port expander
  * @hw: pointer to hardware structure
  * @reg: register number to read
@@ -326,13 +255,6 @@ static void ixgbe_check_cs4227(struct ixgbe_hw *hw)
 		return;
 	}
 
-	/* Is the CS4227 working correctly? */
-	status = ixgbe_get_cs4227_status(hw);
-	if (status) {
-		hw_err(hw, "CS4227 status failed: %d", status);
-		goto out;
-	}
-
 	/* Record completion for next time. */
 	status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH,
 				    IXGBE_CS4227_RESET_COMPLETE);
@@ -1257,31 +1179,71 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
 	if (status)
 		return status;
 
-	/* Configure CS4227 LINE side to 10G SR. */
-	slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12);
-	value = IXGBE_CS4227_SPEED_10G;
-	status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice,
-						  value);
-
-	/* Configure CS4227 for HOST connection rate then type. */
-	slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12);
-	value = speed & IXGBE_LINK_SPEED_10GB_FULL ?
-		IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G;
-	status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice,
-						  value);
+	if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+		/* Configure CS4227 LINE side to 10G SR. */
+		slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12);
+		value = IXGBE_CS4227_SPEED_10G;
+		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
+							  slice, value);
+		if (status)
+			goto i2c_err;
 
-	slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12);
-	if (setup_linear)
-		value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
-	else
+		slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
 		value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-	status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice,
-						  value);
+		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
+							  slice, value);
+		if (status)
+			goto i2c_err;
+
+		/* Configure CS4227 for HOST connection rate then type. */
+		slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12);
+		value = speed & IXGBE_LINK_SPEED_10GB_FULL ?
+			IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G;
+		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
+							  slice, value);
+		if (status)
+			goto i2c_err;
 
-	/* If internal link mode is XFI, then setup XFI internal link. */
-	if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))
+		slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12);
+		if (setup_linear)
+			value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+		else
+			value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
+							  slice, value);
+		if (status)
+			goto i2c_err;
+
+		/* Setup XFI internal link. */
 		status = ixgbe_setup_ixfi_x550em(hw, &speed);
+		if (status) {
+			hw_dbg(hw, "setup_ixfi failed with %d\n", status);
+			return status;
+		}
+	} else {
+		/* Configure internal PHY for KR/KX. */
+		status = ixgbe_setup_kr_speed_x550em(hw, speed);
+		if (status) {
+			hw_dbg(hw, "setup_kr_speed failed with %d\n", status);
+			return status;
+		}
+
+		/* Configure CS4227 LINE side to proper mode. */
+		slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
+		if (setup_linear)
+			value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+		else
+			value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
+							  slice, value);
+		if (status)
+			goto i2c_err;
+	}
 
+	return 0;
+
+i2c_err:
+	hw_dbg(hw, "combined i2c access failed with %d\n", status);
 	return status;
 }
 
@@ -1482,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 				IXGBE_MDIO_GLOBAL_ALARM_1_INT)))
 		return status;
 
-	/* High temperature failure alarm triggered */
+	/* Global alarm triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
 				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
 				      &reg);
@@ -1496,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 		ixgbe_set_copper_phy_power(hw, false);
 		return IXGBE_ERR_OVERTEMP;
 	}
+	if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) {
+		/*  device fault alarm triggered */
+		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG,
+					  IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+					  &reg);
+		if (status)
+			return status;
+
+		/* if device fault was due to high temp alarm handle and exit */
+		if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) {
+			/* power down the PHY in case the PHY FW didn't */
+			ixgbe_set_copper_phy_power(hw, false);
+			return IXGBE_ERR_OVERTEMP;
+		}
+	}
 
 	/* Vendor alarm 2 triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
@@ -1549,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 	if (status)
 		return status;
 
-	/* Enables high temperature failure alarm */
+	/* Enable high temperature failure and global fault alarms */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
 				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
 				      &reg);
 	if (status)
 		return status;
 
-	reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN;
+	reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN |
+		IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN);
 
 	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
 				       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
@@ -1765,6 +1743,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
 	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
 		return IXGBE_ERR_CONFIG;
 
+	if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+		speed = IXGBE_LINK_SPEED_10GB_FULL |
+			IXGBE_LINK_SPEED_1GB_FULL;
+		return ixgbe_setup_kr_speed_x550em(hw, speed);
+	}
+
 	/* If link is not up, then there is no setup necessary so return  */
 	status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
 	if (status)
@@ -1969,7 +1953,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
 static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
 {
 	struct ixgbe_phy_info *phy = &hw->phy;
-	ixgbe_link_speed speed;
 	s32 ret_val;
 
 	hw->mac.ops.set_lan_id(hw);
@@ -1982,13 +1965,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
 		 * to determine internal PHY mode.
 		 */
 		phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
-
-		/* If internal PHY mode is KR, then initialize KR link */
-		if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
-			speed = IXGBE_LINK_SPEED_10GB_FULL |
-				IXGBE_LINK_SPEED_1GB_FULL;
-			ret_val = ixgbe_setup_kr_speed_x550em(hw, speed);
-		}
 	}
 
 	/* Identify the PHY or SFP module */
@@ -2020,14 +1996,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
 		/* If internal link mode is XFI, then setup iXFI internal link,
 		 * else setup KR now.
 		 */
-		if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
-			phy->ops.setup_internal_link =
-					ixgbe_setup_internal_phy_t_x550em;
-		} else {
-			speed = IXGBE_LINK_SPEED_10GB_FULL |
-				IXGBE_LINK_SPEED_1GB_FULL;
-			ret_val = ixgbe_setup_kr_speed_x550em(hw, speed);
-		}
+		phy->ops.setup_internal_link =
+					      ixgbe_setup_internal_phy_t_x550em;
 
 		/* setup SW LPLU only for first revision */
 		if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw,
@@ -2176,13 +2146,14 @@ mac_reset_top:
 	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
 	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
 
 	/* Poll for reset bit to self-clear meaning reset is complete */
 	for (i = 0; i < 10; i++) {
-		udelay(1);
 		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
 		if (!(ctrl & IXGBE_CTRL_RST_MASK))
 			break;
+		udelay(1);
 	}
 
 	if (ctrl & IXGBE_CTRL_RST_MASK) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index d3e5f5b37999..c48aef613b0a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -774,7 +774,7 @@ static int ixgbevf_set_coalesce(struct net_device *netdev,
 		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
 
 	if (adapter->tx_itr_setting == 1)
-		tx_itr_param = IXGBE_10K_ITR;
+		tx_itr_param = IXGBE_12K_ITR;
 	else
 		tx_itr_param = adapter->tx_itr_setting;
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index ec3147279621..68ec7daa04fd 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -326,8 +326,7 @@ static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector)
 #define IXGBE_MIN_RSC_ITR	24
 #define IXGBE_100K_ITR		40
 #define IXGBE_20K_ITR		200
-#define IXGBE_10K_ITR		400
-#define IXGBE_8K_ITR		500
+#define IXGBE_12K_ITR		336
 
 /* Helper macros to switch between ints/sec and what the register uses.
  * And yes, it's the same math going both ways.  The lowest value
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 2955186cd4f6..f098952d4fb4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] =
 #define DRV_VERSION "2.12.1-k"
 const char ixgbevf_driver_version[] = DRV_VERSION;
 static char ixgbevf_copyright[] =
-	"Copyright (c) 2009 - 2012 Intel Corporation.";
+	"Copyright (c) 2009 - 2015 Intel Corporation.";
 
 static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
 	[board_82599_vf] = &ixgbevf_82599_vf_info,
@@ -96,12 +96,14 @@ static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
+static struct workqueue_struct *ixgbevf_wq;
+
 static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter)
 {
 	if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
 	    !test_bit(__IXGBEVF_REMOVING, &adapter->state) &&
 	    !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state))
-		schedule_work(&adapter->service_task);
+		queue_work(ixgbevf_wq, &adapter->service_task);
 }
 
 static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
@@ -1138,7 +1140,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter)
 		if (q_vector->tx.ring && !q_vector->rx.ring) {
 			/* Tx only vector */
 			if (adapter->tx_itr_setting == 1)
-				q_vector->itr = IXGBE_10K_ITR;
+				q_vector->itr = IXGBE_12K_ITR;
 			else
 				q_vector->itr = adapter->tx_itr_setting;
 		} else {
@@ -1196,7 +1198,7 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector,
 	/* simple throttle rate management
 	 *    0-20MB/s lowest (100000 ints/s)
 	 *   20-100MB/s low   (20000 ints/s)
-	 *  100-1249MB/s bulk (8000 ints/s)
+	 *  100-1249MB/s bulk (12000 ints/s)
 	 */
 	/* what was last interrupt timeslice? */
 	timepassed_us = q_vector->itr >> 2;
@@ -1247,7 +1249,7 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector)
 		break;
 	case bulk_latency:
 	default:
-		new_itr = IXGBE_8K_ITR;
+		new_itr = IXGBE_12K_ITR;
 		break;
 	}
 
@@ -1288,7 +1290,7 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
 
 	/* EIAM disabled interrupts (on this vector) for us */
 	if (q_vector->rx.ring || q_vector->tx.ring)
-		napi_schedule(&q_vector->napi);
+		napi_schedule_irqoff(&q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1332,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
 	int txr_remaining = adapter->num_tx_queues;
 	int i, j;
 	int rqpv, tqpv;
-	int err = 0;
 
 	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
@@ -1345,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
 
 		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
 			map_vector_to_txq(adapter, v_start, txr_idx);
-		goto out;
+		return 0;
 	}
 
 	/* If we don't have enough vectors for a 1-to-1
@@ -1370,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
 		}
 	}
 
-out:
-	return err;
+	return 0;
 }
 
 /**
@@ -1469,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter)
 {
-	int err = 0;
-
-	err = ixgbevf_request_msix_irqs(adapter);
+	int err = ixgbevf_request_msix_irqs(adapter);
 
 	if (err)
 		hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err);
@@ -1830,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev,
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	int err = -EOPNOTSUPP;
+	int err;
 
 	spin_lock_bh(&adapter->mbx_lock);
 
@@ -2046,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 		      ixgbe_mbox_api_11,
 		      ixgbe_mbox_api_10,
 		      ixgbe_mbox_api_unknown };
-	int err = 0, idx = 0;
+	int err, idx = 0;
 
 	spin_lock_bh(&adapter->mbx_lock);
 
@@ -2260,10 +2258,8 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter)
 	}
 
 	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
-		memcpy(netdev->dev_addr, adapter->hw.mac.addr,
-		       netdev->addr_len);
-		memcpy(netdev->perm_addr, adapter->hw.mac.addr,
-		       netdev->addr_len);
+		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
 	}
 
 	adapter->last_reset = jiffies;
@@ -2421,7 +2417,7 @@ err_allocation:
 static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int err = 0;
+	int err;
 	int vector, v_budget;
 
 	/* It's easy to be greedy for MSI-X vectors, but it really
@@ -2439,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 	 */
 	adapter->msix_entries = kcalloc(v_budget,
 					sizeof(struct msix_entry), GFP_KERNEL);
-	if (!adapter->msix_entries) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!adapter->msix_entries)
+		return -ENOMEM;
 
 	for (vector = 0; vector < v_budget; vector++)
 		adapter->msix_entries[vector].entry = vector;
 
 	err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
 	if (err)
-		goto out;
+		return err;
 
 	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
 	if (err)
-		goto out;
-
-	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+		return err;
 
-out:
-	return err;
+	return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
 }
 
 /**
@@ -2659,13 +2650,14 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
 		else if (is_zero_ether_addr(adapter->hw.mac.addr))
 			dev_info(&pdev->dev,
 				 "MAC address not assigned by administrator.\n");
-		memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+		ether_addr_copy(netdev->dev_addr, hw->mac.addr);
 	}
 
 	if (!is_valid_ether_addr(netdev->dev_addr)) {
 		dev_info(&pdev->dev, "Assigning random MAC address\n");
 		eth_hw_addr_random(netdev);
-		memcpy(hw->mac.addr, netdev->dev_addr, netdev->addr_len);
+		ether_addr_copy(hw->mac.addr, netdev->dev_addr);
+		ether_addr_copy(hw->mac.perm_addr, netdev->dev_addr);
 	}
 
 	/* Enable dynamic interrupt throttling rates */
@@ -3352,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u8 l4_hdr = 0;
+		__be16 frag_off;
 
 		switch (first->protocol) {
 		case htons(ETH_P_IP):
@@ -3362,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 		case htons(ETH_P_IPV6):
 			vlan_macip_lens |= skb_network_header_len(skb);
 			l4_hdr = ipv6_hdr(skb)->nexthdr;
+			if (likely(skb_network_header_len(skb) ==
+				   sizeof(struct ipv6hdr)))
+				break;
+			ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+					      sizeof(struct ipv6hdr),
+					 &l4_hdr, &frag_off);
+			if (unlikely(frag_off))
+				l4_hdr = NEXTHDR_FRAGMENT;
 			break;
 		default:
-			if (unlikely(net_ratelimit())) {
-				dev_warn(tx_ring->dev,
-					 "partial checksum but proto=%x!\n",
-					 first->protocol);
-			}
 			break;
 		}
 
@@ -3390,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 		default:
 			if (unlikely(net_ratelimit())) {
 				dev_warn(tx_ring->dev,
-					 "partial checksum but l4 proto=%x!\n",
-					 l4_hdr);
+					 "partial checksum, l3 proto=%x, l4 proto=%x\n",
+					 first->protocol, l4_hdr);
 			}
-			break;
+			skb_checksum_help(skb);
+			goto no_csum;
 		}
 
 		/* update TX checksum flag */
 		first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
 	}
 
+no_csum:
 	/* vlan_macip_lens: MACLEN, VLAN tag */
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
@@ -3695,8 +3693,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+	ether_addr_copy(hw->mac.addr, addr->sa_data);
 
 	spin_lock_bh(&adapter->mbx_lock);
 
@@ -4245,15 +4243,17 @@ static struct pci_driver ixgbevf_driver = {
  **/
 static int __init ixgbevf_init_module(void)
 {
-	int ret;
-
 	pr_info("%s - version %s\n", ixgbevf_driver_string,
 		ixgbevf_driver_version);
 
 	pr_info("%s\n", ixgbevf_copyright);
+	ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name);
+	if (!ixgbevf_wq) {
+		pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name);
+		return -ENOMEM;
+	}
 
-	ret = pci_register_driver(&ixgbevf_driver);
-	return ret;
+	return pci_register_driver(&ixgbevf_driver);
 }
 
 module_init(ixgbevf_init_module);
@@ -4267,6 +4267,10 @@ module_init(ixgbevf_init_module);
 static void __exit ixgbevf_exit_module(void)
 {
 	pci_unregister_driver(&ixgbevf_driver);
+	if (ixgbevf_wq) {
+		destroy_workqueue(ixgbevf_wq);
+		ixgbevf_wq = NULL;
+	}
 }
 
 #ifdef DEBUG
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 427f3605cbfc..61a98f4c5746 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -117,7 +117,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 	    msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK))
 		return IXGBE_ERR_INVALID_MAC_ADDR;
 
-	ether_addr_copy(hw->mac.perm_addr, addr);
+	if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK))
+		ether_addr_copy(hw->mac.perm_addr, addr);
+
 	hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD];
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 4182290fdbcf..4eba2ed53052 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -3257,25 +3257,20 @@ static struct platform_driver mv643xx_eth_driver = {
 	},
 };
 
+static struct platform_driver * const drivers[] = {
+	&mv643xx_eth_shared_driver,
+	&mv643xx_eth_driver,
+};
+
 static int __init mv643xx_eth_init_module(void)
 {
-	int rc;
-
-	rc = platform_driver_register(&mv643xx_eth_shared_driver);
-	if (!rc) {
-		rc = platform_driver_register(&mv643xx_eth_driver);
-		if (rc)
-			platform_driver_unregister(&mv643xx_eth_shared_driver);
-	}
-
-	return rc;
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 }
 module_init(mv643xx_eth_init_module);
 
 static void __exit mv643xx_eth_cleanup_module(void)
 {
-	platform_driver_unregister(&mv643xx_eth_driver);
-	platform_driver_unregister(&mv643xx_eth_shared_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 module_exit(mv643xx_eth_cleanup_module);
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e84c7f2634d3..15b1f6bbd92d 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -36,7 +36,7 @@
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
-#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(1)
+#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -62,6 +62,7 @@
 #define MVNETA_WIN_SIZE(w)                      (0x2204 + ((w) << 3))
 #define MVNETA_WIN_REMAP(w)                     (0x2280 + ((w) << 2))
 #define MVNETA_BASE_ADDR_ENABLE                 0x2290
+#define MVNETA_ACCESS_PROTECT_ENABLE            0x2294
 #define MVNETA_PORT_CONFIG                      0x2400
 #define      MVNETA_UNI_PROMISC_MODE            BIT(0)
 #define      MVNETA_DEF_RXQ(q)                  ((q) << 1)
@@ -109,9 +110,17 @@
 #define MVNETA_CPU_MAP(cpu)                      (0x2540 + ((cpu) << 2))
 #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
 #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
+#define      MVNETA_CPU_RXQ_ACCESS(rxq)		 BIT(rxq)
+#define      MVNETA_CPU_TXQ_ACCESS(txq)		 BIT(txq + 8)
 #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
 
-/* Exception Interrupt Port/Queue Cause register */
+/* Exception Interrupt Port/Queue Cause register
+ *
+ * Their behavior depend of the mapping done using the PCPX2Q
+ * registers. For a given CPU if the bit associated to a queue is not
+ * set, then for the register a read from this CPU will always return
+ * 0 and a write won't do anything
+ */
 
 #define MVNETA_INTR_NEW_CAUSE                    0x25a0
 #define MVNETA_INTR_NEW_MASK                     0x25a4
@@ -159,7 +168,7 @@
 
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000  // note: neta says it's 0x000000FF
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0x000000ff
 
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
@@ -242,6 +251,7 @@
 #define MVNETA_VLAN_TAG_LEN             4
 
 #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+#define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
 #define MVNETA_ACC_MODE_EXT		1
 
@@ -252,6 +262,11 @@
 
 #define MVNETA_TX_MTU_MAX		0x3ffff
 
+/* The RSS lookup table actually has 256 entries but we do not use
+ * them yet
+ */
+#define MVNETA_RSS_LU_TABLE_SIZE	1
+
 /* TSO header size */
 #define TSO_HEADER_SIZE 128
 
@@ -354,6 +369,7 @@ struct mvneta_port {
 	struct mvneta_tx_queue *txqs;
 	struct net_device *dev;
 	struct notifier_block cpu_notifier;
+	int rxq_def;
 
 	/* Core clock */
 	struct clk *clk;
@@ -369,9 +385,11 @@ struct mvneta_port {
 	unsigned int duplex;
 	unsigned int speed;
 	unsigned int tx_csum_limit;
-	int use_inband_status:1;
+	unsigned int use_inband_status:1;
 
 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
+
+	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -497,6 +515,9 @@ struct mvneta_tx_queue {
 
 	/* DMA address of TSO headers */
 	dma_addr_t tso_hdrs_phys;
+
+	/* Affinity mask for CPUs*/
+	cpumask_t affinity_mask;
 };
 
 struct mvneta_rx_queue {
@@ -817,7 +838,13 @@ static void mvneta_port_up(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
 	/* Enable all initialized RXQs. */
-	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(rxq_def));
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		if (rxq->descs != NULL)
+			q_map |= (1 << queue);
+	}
+	mvreg_write(pp, MVNETA_RXQ_CMD, q_map);
 }
 
 /* Stop the Ethernet port activity */
@@ -971,6 +998,44 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue)
 		mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val);
 }
 
+static void mvneta_set_autoneg(struct mvneta_port *pp, int enable)
+{
+	u32 val;
+
+	if (enable) {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
+			 MVNETA_GMAC_FORCE_LINK_DOWN |
+			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
+		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
+		       MVNETA_GMAC_AN_SPEED_EN |
+		       MVNETA_GMAC_AN_DUPLEX_EN;
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+		val |= MVNETA_GMAC2_INBAND_AN_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	} else {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
+		       MVNETA_GMAC_AN_SPEED_EN |
+		       MVNETA_GMAC_AN_DUPLEX_EN);
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+		val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+		val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	}
+}
+
 /* This method sets defaults to the NETA port:
  *	Clears interrupt Cause and Mask registers.
  *	Clears all MAC tables.
@@ -985,6 +1050,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	int cpu;
 	int queue;
 	u32 val;
+	int max_cpu = num_present_cpus();
 
 	/* Clear all Cause registers */
 	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
@@ -1000,13 +1066,33 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	/* Enable MBUS Retry bit16 */
 	mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20);
 
-	/* Set CPU queue access map - all CPUs have access to all RX
-	 * queues and to all TX queues
+	/* Set CPU queue access map. CPUs are assigned to the RX and
+	 * TX queues modulo their number. If there is only one TX
+	 * queue then it is assigned to the CPU associated to the
+	 * default RX queue.
 	 */
-	for_each_present_cpu(cpu)
-		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
-			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
-			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));
+	for_each_present_cpu(cpu) {
+		int rxq_map = 0, txq_map = 0;
+		int rxq, txq;
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			if ((rxq % max_cpu) == cpu)
+				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+		for (txq = 0; txq < txq_number; txq++)
+			if ((txq % max_cpu) == cpu)
+				txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+		/* With only one TX queue we configure a special case
+		 * which will allow to get all the irq on a single
+		 * CPU
+		 */
+		if (txq_number == 1)
+			txq_map = (cpu == pp->rxq_def) ?
+				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+	}
 
 	/* Reset RX and TX DMAs */
 	mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET);
@@ -1027,7 +1113,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	mvreg_write(pp, MVNETA_ACC_MODE, val);
 
 	/* Update val of portCfg register accordingly with all RxQueue types */
-	val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def);
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
 	val = 0;
@@ -1056,26 +1142,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	val &= ~MVNETA_PHY_POLLING_ENABLE;
 	mvreg_write(pp, MVNETA_UNIT_CONTROL, val);
 
-	if (pp->use_inband_status) {
-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
-			 MVNETA_GMAC_FORCE_LINK_DOWN |
-			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
-		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
-		       MVNETA_GMAC_AN_SPEED_EN |
-		       MVNETA_GMAC_AN_DUPLEX_EN;
-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
-		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
-		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
-		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
-	} else {
-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
-		       MVNETA_GMAC_AN_SPEED_EN |
-		       MVNETA_GMAC_AN_DUPLEX_EN);
-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
-	}
-
+	mvneta_set_autoneg(pp, pp->use_inband_status);
 	mvneta_set_ucast_table(pp, -1);
 	mvneta_set_special_mcast_table(pp, -1);
 	mvneta_set_other_mcast_table(pp, -1);
@@ -1579,12 +1646,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		}
 
 		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
-		if (!skb)
-			goto err_drop_frame;
 
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
 		dma_unmap_single(dev->dev.parent, phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
 
+		if (!skb)
+			goto err_drop_frame;
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 
@@ -2076,19 +2147,19 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 	if (dev->flags & IFF_PROMISC) {
 		/* Accept all: Multicast + Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 1);
-		mvneta_set_ucast_table(pp, rxq_def);
-		mvneta_set_special_mcast_table(pp, rxq_def);
-		mvneta_set_other_mcast_table(pp, rxq_def);
+		mvneta_set_ucast_table(pp, pp->rxq_def);
+		mvneta_set_special_mcast_table(pp, pp->rxq_def);
+		mvneta_set_other_mcast_table(pp, pp->rxq_def);
 	} else {
 		/* Accept single Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 0);
 		mvneta_set_ucast_table(pp, -1);
-		mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
+		mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def);
 
 		if (dev->flags & IFF_ALLMULTI) {
 			/* Accept all multicast */
-			mvneta_set_special_mcast_table(pp, rxq_def);
-			mvneta_set_other_mcast_table(pp, rxq_def);
+			mvneta_set_special_mcast_table(pp, pp->rxq_def);
+			mvneta_set_other_mcast_table(pp, pp->rxq_def);
 		} else {
 			/* Accept only initialized multicast */
 			mvneta_set_special_mcast_table(pp, -1);
@@ -2097,7 +2168,7 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 			if (!netdev_mc_empty(dev)) {
 				netdev_for_each_mc_addr(ha, dev) {
 					mvneta_mcast_addr_set(pp, ha->addr,
-							      rxq_def);
+							      pp->rxq_def);
 				}
 			}
 		}
@@ -2148,6 +2219,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 {
 	int rx_done = 0;
 	u32 cause_rx_tx;
+	int rx_queue;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
@@ -2179,8 +2251,15 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	/* For the case where the last mvneta_poll did not process all
 	 * RX packets
 	 */
+	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
+
 	cause_rx_tx |= port->cause_rx_tx;
-	rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
+
+	if (rx_queue) {
+		rx_queue = rx_queue - 1;
+		rx_done = mvneta_rx(pp, budget, &pp->rxqs[rx_queue]);
+	}
+
 	budget -= rx_done;
 
 	if (budget > 0) {
@@ -2297,6 +2376,8 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 static int mvneta_txq_init(struct mvneta_port *pp,
 			   struct mvneta_tx_queue *txq)
 {
+	int cpu;
+
 	txq->size = pp->tx_ring_size;
 
 	/* A queue must always have room for at least one skb.
@@ -2349,6 +2430,14 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 	}
 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
 
+	/* Setup XPS mapping */
+	if (txq_number > 1)
+		cpu = txq->id % num_present_cpus();
+	else
+		cpu = pp->rxq_def % num_present_cpus();
+	cpumask_set_cpu(cpu, &txq->affinity_mask);
+	netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id);
+
 	return 0;
 }
 
@@ -2393,19 +2482,27 @@ static void mvneta_cleanup_txqs(struct mvneta_port *pp)
 /* Cleanup all Rx queues */
 static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
-	mvneta_rxq_deinit(pp, &pp->rxqs[rxq_def]);
+	int queue;
+
+	for (queue = 0; queue < txq_number; queue++)
+		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
 
 /* Init all Rx queues */
 static int mvneta_setup_rxqs(struct mvneta_port *pp)
 {
-	int err = mvneta_rxq_init(pp, &pp->rxqs[rxq_def]);
-	if (err) {
-		netdev_err(pp->dev, "%s: can't create rxq=%d\n",
-			   __func__, rxq_def);
-		mvneta_cleanup_rxqs(pp);
-		return err;
+	int queue;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		int err = mvneta_rxq_init(pp, &pp->rxqs[queue]);
+
+		if (err) {
+			netdev_err(pp->dev, "%s: can't create rxq=%d\n",
+				   __func__, queue);
+			mvneta_cleanup_rxqs(pp);
+			return err;
+		}
 	}
 
 	return 0;
@@ -2429,6 +2526,31 @@ static int mvneta_setup_txqs(struct mvneta_port *pp)
 	return 0;
 }
 
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are unmasked, but actually only the ones
+	 * maped to this CPU will be unmasked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+		    MVNETA_RX_INTR_MASK_ALL |
+		    MVNETA_TX_INTR_MASK_ALL |
+		    MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are masked, but actually only the ones
+	 * maped to this CPU will be masked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
 	unsigned int cpu;
@@ -2446,11 +2568,10 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 		napi_enable(&port->napi);
 	}
 
-	/* Unmask interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK(rxq_number) |
-		    MVNETA_TX_INTR_MASK(txq_number) |
-		    MVNETA_MISCINTR_INTR_MASK);
+	/* Unmask interrupts. It has to be done from each CPU */
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
+					 pp, true);
 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
 		    MVNETA_CAUSE_LINK_CHANGE |
@@ -2609,7 +2730,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
 	mvneta_mac_addr_set(pp, dev->dev_addr, -1);
 
 	/* Set new addr in hw */
-	mvneta_mac_addr_set(pp, sockaddr->sa_data, rxq_def);
+	mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def);
 
 	eth_commit_mac_addr_change(dev, addr);
 	return 0;
@@ -2726,22 +2847,45 @@ static void mvneta_percpu_disable(void *arg)
 
 static void mvneta_percpu_elect(struct mvneta_port *pp)
 {
-	int online_cpu_idx, cpu, i = 0;
+	int online_cpu_idx, max_cpu, cpu, i = 0;
 
-	online_cpu_idx = rxq_def % num_online_cpus();
+	online_cpu_idx = pp->rxq_def % num_online_cpus();
+	max_cpu = num_present_cpus();
 
 	for_each_online_cpu(cpu) {
+		int rxq_map = 0, txq_map = 0;
+		int rxq;
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			if ((rxq % max_cpu) == cpu)
+				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
 		if (i == online_cpu_idx)
-			/* Enable per-CPU interrupt on the one CPU we
-			 * just elected
+			/* Map the default receive queue queue to the
+			 * elected CPU
 			 */
-			smp_call_function_single(cpu, mvneta_percpu_enable,
-						pp, true);
+			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
+
+		/* We update the TX queue map only if we have one
+		 * queue. In this case we associate the TX queue to
+		 * the CPU bound to the default RX queue
+		 */
+		if (txq_number == 1)
+			txq_map = (i == online_cpu_idx) ?
+				MVNETA_CPU_TXQ_ACCESS(1) : 0;
 		else
-			/* Disable per-CPU interrupt on all the other CPU */
-			smp_call_function_single(cpu, mvneta_percpu_disable,
-						pp, true);
+			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
+				MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+
+		/* Update the interrupt mask on each CPU according the
+		 * new mapping
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
+					 pp, true);
 		i++;
+
 	}
 };
 
@@ -2776,12 +2920,22 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
 		napi_enable(&port->napi);
 
+
+		/* Enable per-CPU interrupts on the CPU that is
+		 * brought up.
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_enable,
+					 pp, true);
+
 		/* Enable per-CPU interrupt on the one CPU we care
 		 * about.
 		 */
 		mvneta_percpu_elect(pp);
 
-		/* Unmask all ethernet port interrupts */
+		/* Unmask all ethernet port interrupts, as this
+		 * notifier is called for each CPU then the CPU to
+		 * Queue mapping is applied
+		 */
 		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
 			MVNETA_RX_INTR_MASK(rxq_number) |
 			MVNETA_TX_INTR_MASK(txq_number) |
@@ -2832,7 +2986,7 @@ static int mvneta_percpu_notifier(struct notifier_block *nfb,
 static int mvneta_open(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int ret;
+	int ret, cpu;
 
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
@@ -2862,8 +3016,13 @@ static int mvneta_open(struct net_device *dev)
 	 */
 	mvneta_percpu_disable(pp);
 
-	/* Elect a CPU to handle our RX queue interrupt */
-	mvneta_percpu_elect(pp);
+	/* Enable per-CPU interrupt on all the CPU to handle our RX
+	 * queue interrupts
+	 */
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, mvneta_percpu_enable,
+					 pp, true);
+
 
 	/* Register a CPU notifier to handle the case where our CPU
 	 * might be taken offline.
@@ -2937,10 +3096,43 @@ int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+	struct phy_device *phydev = pp->phy_dev;
 
-	if (!pp->phy_dev)
+	if (!phydev)
 		return -ENODEV;
 
+	if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
+		u32 val;
+
+		mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
+
+		if (cmd->autoneg == AUTONEG_DISABLE) {
+			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
+				 MVNETA_GMAC_CONFIG_GMII_SPEED |
+				 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
+
+			if (phydev->duplex)
+				val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+
+			if (phydev->speed == SPEED_1000)
+				val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
+			else if (phydev->speed == SPEED_100)
+				val |= MVNETA_GMAC_CONFIG_MII_SPEED;
+
+			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+		}
+
+		pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
+		netdev_info(pp->dev, "autoneg status set to %i\n",
+			    pp->use_inband_status);
+
+		if (netif_running(dev)) {
+			mvneta_port_down(pp);
+			mvneta_port_up(pp);
+		}
+	}
+
 	return phy_ethtool_sset(pp->phy_dev, cmd);
 }
 
@@ -3092,6 +3284,106 @@ static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
 	return -EOPNOTSUPP;
 }
 
+static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev)
+{
+	return MVNETA_RSS_LU_TABLE_SIZE;
+}
+
+static int mvneta_ethtool_get_rxnfc(struct net_device *dev,
+				    struct ethtool_rxnfc *info,
+				    u32 *rules __always_unused)
+{
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data =  rxq_number;
+		return 0;
+	case ETHTOOL_GRXFH:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int  mvneta_config_rss(struct mvneta_port *pp)
+{
+	int cpu;
+	u32 val;
+
+	netif_tx_stop_all_queues(pp->dev);
+
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
+					 pp, true);
+
+	/* We have to synchronise on the napi of each CPU */
+	for_each_online_cpu(cpu) {
+		struct mvneta_pcpu_port *pcpu_port =
+			per_cpu_ptr(pp->ports, cpu);
+
+		napi_synchronize(&pcpu_port->napi);
+		napi_disable(&pcpu_port->napi);
+	}
+
+	pp->rxq_def = pp->indir[0];
+
+	/* Update unicast mapping */
+	mvneta_set_rx_mode(pp->dev);
+
+	/* Update val of portCfg register accordingly with all RxQueue types */
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
+	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+
+	/* Update the elected CPU matching the new rxq_def */
+	mvneta_percpu_elect(pp);
+
+	/* We have to synchronise on the napi of each CPU */
+	for_each_online_cpu(cpu) {
+		struct mvneta_pcpu_port *pcpu_port =
+			per_cpu_ptr(pp->ports, cpu);
+
+		napi_enable(&pcpu_port->napi);
+	}
+
+	netif_tx_start_all_queues(pp->dev);
+
+	return 0;
+}
+
+static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
+				   const u8 *key, const u8 hfunc)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	/* We require at least one supported parameter to be changed
+	 * and no change in any of the unsupported parameters
+	 */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!indir)
+		return 0;
+
+	memcpy(pp->indir, indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return mvneta_config_rss(pp);
+}
+
+static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+				   u8 *hfunc)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	memcpy(indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return 0;
+}
+
 static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_open            = mvneta_open,
 	.ndo_stop            = mvneta_stop,
@@ -3116,6 +3408,10 @@ const struct ethtool_ops mvneta_eth_tool_ops = {
 	.get_strings	= mvneta_ethtool_get_strings,
 	.get_ethtool_stats = mvneta_ethtool_get_stats,
 	.get_sset_count	= mvneta_ethtool_get_sset_count,
+	.get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size,
+	.get_rxnfc	= mvneta_ethtool_get_rxnfc,
+	.get_rxfh	= mvneta_ethtool_get_rxfh,
+	.set_rxfh	= mvneta_ethtool_set_rxfh,
 };
 
 /* Initialize hw */
@@ -3191,6 +3487,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
 }
 
 /* Power up the port */
@@ -3223,9 +3520,6 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode)
 		return -EINVAL;
 	}
 
-	if (pp->use_inband_status)
-		ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE;
-
 	/* Cancel Port Reset */
 	ctrl &= ~MVNETA_GMAC2_PORT_RESET;
 	mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl);
@@ -3250,6 +3544,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	char hw_mac_addr[ETH_ALEN];
 	const char *mac_from;
 	const char *managed;
+	int tx_csum_limit;
 	int phy_mode;
 	int err;
 	int cpu;
@@ -3306,6 +3601,10 @@ static int mvneta_probe(struct platform_device *pdev)
 				 strcmp(managed, "in-band-status") == 0);
 	pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
 
+	pp->rxq_def = rxq_def;
+
+	pp->indir[0] = rxq_def;
+
 	pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
@@ -3350,8 +3649,21 @@ static int mvneta_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
-		pp->tx_csum_limit = 1600;
+	if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+		if (tx_csum_limit < 0 ||
+		    tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+			tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+			dev_info(&pdev->dev,
+				 "Wrong TX csum limit in DT, set to %dB\n",
+				 MVNETA_TX_CSUM_DEF_SIZE);
+		}
+	} else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+		tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+	} else {
+		tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+	}
+
+	pp->tx_csum_limit = tx_csum_limit;
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 603d1c3d3b2e..4696053165f8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
 	      eqe = next_slave_event_eqe(slave_eq)) {
 		slave = eqe->slave_id;
 
+		if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE &&
+		    eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN &&
+		    mlx4_is_bonded(dev)) {
+			struct mlx4_port_cap port_cap;
+
+			if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state)
+				goto consume;
+
+			if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state)
+				goto consume;
+		}
 		/* All active slaves need to receive the event */
 		if (slave == ALL_SLAVES) {
 			for (i = 0; i <= dev->persist->num_vfs; i++) {
@@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
 				mlx4_warn(dev, "Failed to generate event for slave %d\n",
 					  slave);
 		}
+consume:
 		++slave_eq->cons;
 	}
 }
@@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 					break;
 				for (i = 0; i < dev->persist->num_vfs + 1;
 				     i++) {
-					if (!test_bit(i, slaves_port.slaves))
+					int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);
+
+					if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
 						continue;
 					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
 						if (i == mlx4_master_func_num(dev))
@@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 							eqe->event.port_change.port =
 								cpu_to_be32(
 								(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
-								| (mlx4_phys_to_slave_port(dev, i, port) << 28));
+								| (reported_port << 28));
 							mlx4_slave_event(dev, i, eqe);
 						}
 					} else {  /* IB port */
@@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 					for (i = 0;
 					     i < dev->persist->num_vfs + 1;
 					     i++) {
-						if (!test_bit(i, slaves_port.slaves))
+						int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);
+
+						if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
 							continue;
 						if (i == mlx4_master_func_num(dev))
 							continue;
@@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 							eqe->event.port_change.port =
 								cpu_to_be32(
 								(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
-								| (mlx4_phys_to_slave_port(dev, i, port) << 28));
+								| (reported_port << 28));
 							mlx4_slave_event(dev, i, eqe);
 						}
 					}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 90db94e83fde..2c2baab9d880 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
 			goto out;
 
 		MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+		port_cap->link_state = (field & 0x80) >> 7;
 		port_cap->supported_port_types = field & 3;
 		port_cap->suggested_type = (field >> 3) & 1;
 		port_cap->default_sense = (field >> 4) & 1;
@@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
 			port_type |= MLX4_PORT_LINK_UP_MASK;
 		else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state)
 			port_type &= ~MLX4_PORT_LINK_UP_MASK;
+		else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) {
+			int other_port = (port == 1) ? 2 : 1;
+			struct mlx4_port_cap port_cap;
+
+			err = mlx4_QUERY_PORT(dev, other_port, &port_cap);
+			if (err)
+				goto out;
+			port_type |= (port_cap.link_state << 7);
+		}
 
 		MLX4_PUT(outbox->buf, port_type,
 			 QUERY_PORT_SUPPORTED_TYPE_OFFSET);
@@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
 		MLX4_PUT(outbox->buf, short_field,
 			 QUERY_PORT_CUR_MAX_PKEY_OFFSET);
 	}
-
+out:
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 08de5555c2f4..7ea258af636a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg {
 };
 
 struct mlx4_port_cap {
+	u8  link_state;
 	u8  supported_port_types;
 	u8  suggested_type;
 	u8  default_sense;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 31c491e02e69..f1b6d219e445 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1221,6 +1221,76 @@ err_set_port:
 	return err ? err : count;
 }
 
+/* bond for multi-function device */
+#define MAX_MF_BOND_ALLOWED_SLAVES 63
+static int mlx4_mf_bond(struct mlx4_dev *dev)
+{
+	int err = 0;
+	struct mlx4_slaves_pport slaves_port1;
+	struct mlx4_slaves_pport slaves_port2;
+	DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
+
+	slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
+	slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
+	bitmap_and(slaves_port_1_2,
+		   slaves_port1.slaves, slaves_port2.slaves,
+		   dev->persist->num_vfs + 1);
+
+	/* only single port vfs are allowed */
+	if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
+		mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
+		return -EINVAL;
+	}
+
+	/* limit on maximum allowed VFs */
+	if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
+	    bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
+	    MAX_MF_BOND_ALLOWED_SLAVES)
+		return -EINVAL;
+
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
+		mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
+		return -EINVAL;
+	}
+
+	err = mlx4_bond_mac_table(dev);
+	if (err)
+		return err;
+	err = mlx4_bond_vlan_table(dev);
+	if (err)
+		goto err1;
+	err = mlx4_bond_fs_rules(dev);
+	if (err)
+		goto err2;
+
+	return 0;
+err2:
+	(void)mlx4_unbond_vlan_table(dev);
+err1:
+	(void)mlx4_unbond_mac_table(dev);
+	return err;
+}
+
+static int mlx4_mf_unbond(struct mlx4_dev *dev)
+{
+	int ret, ret1;
+
+	ret = mlx4_unbond_fs_rules(dev);
+	if (ret)
+		mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
+	ret1 = mlx4_unbond_mac_table(dev);
+	if (ret1) {
+		mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
+		ret = ret1;
+	}
+	ret1 = mlx4_unbond_vlan_table(dev);
+	if (ret1) {
+		mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
+		ret = ret1;
+	}
+	return ret;
+}
+
 int mlx4_bond(struct mlx4_dev *dev)
 {
 	int ret = 0;
@@ -1228,16 +1298,23 @@ int mlx4_bond(struct mlx4_dev *dev)
 
 	mutex_lock(&priv->bond_mutex);
 
-	if (!mlx4_is_bonded(dev))
+	if (!mlx4_is_bonded(dev)) {
 		ret = mlx4_do_bond(dev, true);
-	else
-		ret = 0;
+		if (ret)
+			mlx4_err(dev, "Failed to bond device: %d\n", ret);
+		if (!ret && mlx4_is_master(dev)) {
+			ret = mlx4_mf_bond(dev);
+			if (ret) {
+				mlx4_err(dev, "bond for multifunction failed\n");
+				mlx4_do_bond(dev, false);
+			}
+		}
+	}
 
 	mutex_unlock(&priv->bond_mutex);
-	if (ret)
-		mlx4_err(dev, "Failed to bond device: %d\n", ret);
-	else
+	if (!ret)
 		mlx4_dbg(dev, "Device is bonded\n");
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mlx4_bond);
@@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev)
 
 	mutex_lock(&priv->bond_mutex);
 
-	if (mlx4_is_bonded(dev))
+	if (mlx4_is_bonded(dev)) {
+		int ret2 = 0;
+
 		ret = mlx4_do_bond(dev, false);
+		if (ret)
+			mlx4_err(dev, "Failed to unbond device: %d\n", ret);
+		if (mlx4_is_master(dev))
+			ret2 = mlx4_mf_unbond(dev);
+		if (ret2) {
+			mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
+			ret = ret2;
+		}
+	}
 
 	mutex_unlock(&priv->bond_mutex);
-	if (ret)
-		mlx4_err(dev, "Failed to unbond device: %d\n", ret);
-	else
+	if (!ret)
 		mlx4_dbg(dev, "Device is unbonded\n");
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mlx4_unbond);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index e1cf9036af22..2404c22ad2b2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -736,6 +736,7 @@ struct mlx4_catas_err {
 struct mlx4_mac_table {
 	__be64			entries[MLX4_MAX_MAC_NUM];
 	int			refs[MLX4_MAX_MAC_NUM];
+	bool			is_dup[MLX4_MAX_MAC_NUM];
 	struct mutex		mutex;
 	int			total;
 	int			max;
@@ -758,6 +759,7 @@ struct mlx4_roce_gid_table {
 struct mlx4_vlan_table {
 	__be32			entries[MLX4_MAX_VLAN_NUM];
 	int			refs[MLX4_MAX_VLAN_NUM];
+	int			is_dup[MLX4_MAX_VLAN_NUM];
 	struct mutex		mutex;
 	int			total;
 	int			max;
@@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev,
 			      struct mlx4_roce_gid_table *table);
 void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
+int mlx4_bond_vlan_table(struct mlx4_dev *dev);
+int mlx4_unbond_vlan_table(struct mlx4_dev *dev);
+int mlx4_bond_mac_table(struct mlx4_dev *dev);
+int mlx4_unbond_mac_table(struct mlx4_dev *dev);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
 /* resource tracker functions*/
@@ -1385,6 +1391,8 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
 int mlx4_config_mad_demux(struct mlx4_dev *dev);
 int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
+int mlx4_bond_fs_rules(struct mlx4_dev *dev);
+int mlx4_unbond_fs_rules(struct mlx4_dev *dev);
 
 enum mlx4_zone_flags {
 	MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO	= 1UL << 0,
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index c2b21313dba7..f2550425c251 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -61,6 +61,7 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
 		table->entries[i] = 0;
 		table->refs[i]	 = 0;
+		table->is_dup[i] = false;
 	}
 	table->max   = 1 << dev->caps.log_num_macs;
 	table->total = 0;
@@ -74,6 +75,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
 	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
 		table->entries[i] = 0;
 		table->refs[i]	 = 0;
+		table->is_dup[i] = false;
 	}
 	table->max   = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR;
 	table->total = 0;
@@ -159,21 +161,94 @@ int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
 }
 EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
 
+static bool mlx4_need_mf_bond(struct mlx4_dev *dev)
+{
+	int i, num_eth_ports = 0;
+
+	if (!mlx4_is_mfunc(dev))
+		return false;
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+		++num_eth_ports;
+
+	return (num_eth_ports ==  2) ? true : false;
+}
+
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
 	int i, err = 0;
 	int free = -1;
+	int free_for_dup = -1;
+	bool dup = mlx4_is_mf_bonded(dev);
+	u8 dup_port = (port == 1) ? 2 : 1;
+	struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
+	bool need_mf_bond = mlx4_need_mf_bond(dev);
+	bool can_mf_bond = true;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n",
+		 (unsigned long long)mac, port,
+		 dup ? "with" : "without");
+
+	if (need_mf_bond) {
+		if (port == 1) {
+			mutex_lock(&table->mutex);
+			mutex_lock(&dup_table->mutex);
+		} else {
+			mutex_lock(&dup_table->mutex);
+			mutex_lock(&table->mutex);
+		}
+	} else {
+		mutex_lock(&table->mutex);
+	}
+
+	if (need_mf_bond) {
+		int index_at_port = -1;
+		int index_at_dup_port = -1;
 
-	mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
-		 (unsigned long long) mac, port);
+		for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+			if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))))
+				index_at_port = i;
+			if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]))))
+				index_at_dup_port = i;
+		}
+
+		/* check that same mac is not in the tables at different indices */
+		if ((index_at_port != index_at_dup_port) &&
+		    (index_at_port >= 0) &&
+		    (index_at_dup_port >= 0))
+			can_mf_bond = false;
+
+		/* If the mac is already in the primary table, the slot must be
+		 * available in the duplicate table as well.
+		 */
+		if (index_at_port >= 0 && index_at_dup_port < 0 &&
+		    dup_table->refs[index_at_port]) {
+			can_mf_bond = false;
+		}
+		/* If the mac is already in the duplicate table, check that the
+		 * corresponding index is not occupied in the primary table, or
+		 * the primary table already contains the mac at the same index.
+		 * Otherwise, you cannot bond (primary contains a different mac
+		 * at that index).
+		 */
+		if (index_at_dup_port >= 0) {
+			if (!table->refs[index_at_dup_port] ||
+			    ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port]))))
+				free_for_dup = index_at_dup_port;
+			else
+				can_mf_bond = false;
+		}
+	}
 
-	mutex_lock(&table->mutex);
 	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
 		if (!table->refs[i]) {
 			if (free < 0)
 				free = i;
+			if (free_for_dup < 0 && need_mf_bond && can_mf_bond) {
+				if (!dup_table->refs[i])
+					free_for_dup = i;
+			}
 			continue;
 		}
 
@@ -182,10 +257,30 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 			/* MAC already registered, increment ref count */
 			err = i;
 			++table->refs[i];
+			if (dup) {
+				u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]);
+
+				if (dup_mac != mac || !dup_table->is_dup[i]) {
+					mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n",
+						  mac, dup_port, i);
+				}
+			}
 			goto out;
 		}
 	}
 
+	if (need_mf_bond && (free_for_dup < 0)) {
+		if (dup) {
+			mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n");
+			mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n");
+			dup = false;
+		}
+		can_mf_bond = false;
+	}
+
+	if (need_mf_bond && can_mf_bond)
+		free = free_for_dup;
+
 	mlx4_dbg(dev, "Free MAC index is %d\n", free);
 
 	if (table->total == table->max) {
@@ -205,10 +300,35 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 		goto out;
 	}
 	table->refs[free] = 1;
-	err = free;
+	table->is_dup[free] = false;
 	++table->total;
+	if (dup) {
+		dup_table->refs[free] = 0;
+		dup_table->is_dup[free] = true;
+		dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
+
+		err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries);
+		if (unlikely(err)) {
+			mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac);
+			dup_table->is_dup[free] = false;
+			dup_table->entries[free] = 0;
+			goto out;
+		}
+		++dup_table->total;
+	}
+	err = free;
 out:
-	mutex_unlock(&table->mutex);
+	if (need_mf_bond) {
+		if (port == 2) {
+			mutex_unlock(&table->mutex);
+			mutex_unlock(&dup_table->mutex);
+		} else {
+			mutex_unlock(&dup_table->mutex);
+			mutex_unlock(&table->mutex);
+		}
+	} else {
+		mutex_unlock(&table->mutex);
+	}
 	return err;
 }
 EXPORT_SYMBOL_GPL(__mlx4_register_mac);
@@ -255,6 +375,9 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 	struct mlx4_port_info *info;
 	struct mlx4_mac_table *table;
 	int index;
+	bool dup = mlx4_is_mf_bonded(dev);
+	u8 dup_port = (port == 1) ? 2 : 1;
+	struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
 
 	if (port < 1 || port > dev->caps.num_ports) {
 		mlx4_warn(dev, "invalid port number (%d), aborting...\n", port);
@@ -262,22 +385,59 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 	}
 	info = &mlx4_priv(dev)->port[port];
 	table = &info->mac_table;
-	mutex_lock(&table->mutex);
+
+	if (dup) {
+		if (port == 1) {
+			mutex_lock(&table->mutex);
+			mutex_lock(&dup_table->mutex);
+		} else {
+			mutex_lock(&dup_table->mutex);
+			mutex_lock(&table->mutex);
+		}
+	} else {
+		mutex_lock(&table->mutex);
+	}
+
 	index = find_index(dev, table, mac);
 
 	if (validate_index(dev, table, index))
 		goto out;
-	if (--table->refs[index]) {
+
+	if (--table->refs[index] || table->is_dup[index]) {
 		mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n",
 			 index);
+		if (!table->refs[index])
+			dup_table->is_dup[index] = false;
 		goto out;
 	}
 
 	table->entries[index] = 0;
-	mlx4_set_port_mac_table(dev, port, table->entries);
+	if (mlx4_set_port_mac_table(dev, port, table->entries))
+		mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port);
 	--table->total;
+
+	if (dup) {
+		dup_table->is_dup[index] = false;
+		if (dup_table->refs[index])
+			goto out;
+		dup_table->entries[index] = 0;
+		if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries))
+			mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port);
+
+		--table->total;
+	}
 out:
-	mutex_unlock(&table->mutex);
+	if (dup) {
+		if (port == 2) {
+			mutex_unlock(&table->mutex);
+			mutex_unlock(&dup_table->mutex);
+		} else {
+			mutex_unlock(&dup_table->mutex);
+			mutex_unlock(&table->mutex);
+		}
+	} else {
+		mutex_unlock(&table->mutex);
+	}
 }
 EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
 
@@ -311,9 +471,22 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 	struct mlx4_mac_table *table = &info->mac_table;
 	int index = qpn - info->base_qpn;
 	int err = 0;
+	bool dup = mlx4_is_mf_bonded(dev);
+	u8 dup_port = (port == 1) ? 2 : 1;
+	struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
 
 	/* CX1 doesn't support multi-functions */
-	mutex_lock(&table->mutex);
+	if (dup) {
+		if (port == 1) {
+			mutex_lock(&table->mutex);
+			mutex_lock(&dup_table->mutex);
+		} else {
+			mutex_lock(&dup_table->mutex);
+			mutex_lock(&table->mutex);
+		}
+	} else {
+		mutex_lock(&table->mutex);
+	}
 
 	err = validate_index(dev, table, index);
 	if (err)
@@ -326,9 +499,30 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
 			 (unsigned long long) new_mac);
 		table->entries[index] = 0;
+	} else {
+		if (dup) {
+			dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+			err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries);
+			if (unlikely(err)) {
+				mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n",
+					 (unsigned long long)new_mac);
+				dup_table->entries[index] = 0;
+			}
+		}
 	}
 out:
-	mutex_unlock(&table->mutex);
+	if (dup) {
+		if (port == 2) {
+			mutex_unlock(&table->mutex);
+			mutex_unlock(&dup_table->mutex);
+		} else {
+			mutex_unlock(&dup_table->mutex);
+			mutex_unlock(&table->mutex);
+		}
+	} else {
+		mutex_unlock(&table->mutex);
+	}
 	return err;
 }
 EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
@@ -380,8 +574,28 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 	int i, err = 0;
 	int free = -1;
-
-	mutex_lock(&table->mutex);
+	int free_for_dup = -1;
+	bool dup = mlx4_is_mf_bonded(dev);
+	u8 dup_port = (port == 1) ? 2 : 1;
+	struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table;
+	bool need_mf_bond = mlx4_need_mf_bond(dev);
+	bool can_mf_bond = true;
+
+	mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n",
+		 vlan, port,
+		 dup ? "with" : "without");
+
+	if (need_mf_bond) {
+		if (port == 1) {
+			mutex_lock(&table->mutex);
+			mutex_lock(&dup_table->mutex);
+		} else {
+			mutex_lock(&dup_table->mutex);
+			mutex_lock(&table->mutex);
+		}
+	} else {
+		mutex_lock(&table->mutex);
+	}
 
 	if (table->total == table->max) {
 		/* No free vlan entries */
@@ -389,22 +603,85 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
 		goto out;
 	}
 
+	if (need_mf_bond) {
+		int index_at_port = -1;
+		int index_at_dup_port = -1;
+
+		for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
+			if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i]))))
+				index_at_port = i;
+			if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]))))
+				index_at_dup_port = i;
+		}
+		/* check that same vlan is not in the tables at different indices */
+		if ((index_at_port != index_at_dup_port) &&
+		    (index_at_port >= 0) &&
+		    (index_at_dup_port >= 0))
+			can_mf_bond = false;
+
+		/* If the vlan is already in the primary table, the slot must be
+		 * available in the duplicate table as well.
+		 */
+		if (index_at_port >= 0 && index_at_dup_port < 0 &&
+		    dup_table->refs[index_at_port]) {
+			can_mf_bond = false;
+		}
+		/* If the vlan is already in the duplicate table, check that the
+		 * corresponding index is not occupied in the primary table, or
+		 * the primary table already contains the vlan at the same index.
+		 * Otherwise, you cannot bond (primary contains a different vlan
+		 * at that index).
+		 */
+		if (index_at_dup_port >= 0) {
+			if (!table->refs[index_at_dup_port] ||
+			    (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port]))))
+				free_for_dup = index_at_dup_port;
+			else
+				can_mf_bond = false;
+		}
+	}
+
 	for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
-		if (free < 0 && (table->refs[i] == 0)) {
-			free = i;
-			continue;
+		if (!table->refs[i]) {
+			if (free < 0)
+				free = i;
+			if (free_for_dup < 0 && need_mf_bond && can_mf_bond) {
+				if (!dup_table->refs[i])
+					free_for_dup = i;
+			}
 		}
 
-		if (table->refs[i] &&
+		if ((table->refs[i] || table->is_dup[i]) &&
 		    (vlan == (MLX4_VLAN_MASK &
 			      be32_to_cpu(table->entries[i])))) {
 			/* Vlan already registered, increase references count */
+			mlx4_dbg(dev, "vlan %u is already registered.\n", vlan);
 			*index = i;
 			++table->refs[i];
+			if (dup) {
+				u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]);
+
+				if (dup_vlan != vlan || !dup_table->is_dup[i]) {
+					mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n",
+						  vlan, dup_port, i);
+				}
+			}
 			goto out;
 		}
 	}
 
+	if (need_mf_bond && (free_for_dup < 0)) {
+		if (dup) {
+			mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n");
+			mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n");
+			dup = false;
+		}
+		can_mf_bond = false;
+	}
+
+	if (need_mf_bond && can_mf_bond)
+		free = free_for_dup;
+
 	if (free < 0) {
 		err = -ENOMEM;
 		goto out;
@@ -412,6 +689,7 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
 
 	/* Register new VLAN */
 	table->refs[free] = 1;
+	table->is_dup[free] = false;
 	table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
 
 	err = mlx4_set_port_vlan_table(dev, port, table->entries);
@@ -421,11 +699,35 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
 		table->entries[free] = 0;
 		goto out;
 	}
+	++table->total;
+	if (dup) {
+		dup_table->refs[free] = 0;
+		dup_table->is_dup[free] = true;
+		dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
+
+		err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries);
+		if (unlikely(err)) {
+			mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan);
+			dup_table->is_dup[free] = false;
+			dup_table->entries[free] = 0;
+			goto out;
+		}
+		++dup_table->total;
+	}
 
 	*index = free;
-	++table->total;
 out:
-	mutex_unlock(&table->mutex);
+	if (need_mf_bond) {
+		if (port == 2) {
+			mutex_unlock(&table->mutex);
+			mutex_unlock(&dup_table->mutex);
+		} else {
+			mutex_unlock(&dup_table->mutex);
+			mutex_unlock(&table->mutex);
+		}
+	} else {
+		mutex_unlock(&table->mutex);
+	}
 	return err;
 }
 
@@ -455,8 +757,22 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 	int index;
+	bool dup = mlx4_is_mf_bonded(dev);
+	u8 dup_port = (port == 1) ? 2 : 1;
+	struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table;
+
+	if (dup) {
+		if (port == 1) {
+			mutex_lock(&table->mutex);
+			mutex_lock(&dup_table->mutex);
+		} else {
+			mutex_lock(&dup_table->mutex);
+			mutex_lock(&table->mutex);
+		}
+	} else {
+		mutex_lock(&table->mutex);
+	}
 
-	mutex_lock(&table->mutex);
 	if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
 		mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
 		goto out;
@@ -467,16 +783,38 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 		goto out;
 	}
 
-	if (--table->refs[index]) {
+	if (--table->refs[index] || table->is_dup[index]) {
 		mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n",
 			 table->refs[index], index);
+		if (!table->refs[index])
+			dup_table->is_dup[index] = false;
 		goto out;
 	}
 	table->entries[index] = 0;
-	mlx4_set_port_vlan_table(dev, port, table->entries);
+	if (mlx4_set_port_vlan_table(dev, port, table->entries))
+		mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port);
 	--table->total;
+	if (dup) {
+		dup_table->is_dup[index] = false;
+		if (dup_table->refs[index])
+			goto out;
+		dup_table->entries[index] = 0;
+		if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries))
+			mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port);
+		--dup_table->total;
+	}
 out:
-	mutex_unlock(&table->mutex);
+	if (dup) {
+		if (port == 2) {
+			mutex_unlock(&table->mutex);
+			mutex_unlock(&dup_table->mutex);
+		} else {
+			mutex_unlock(&dup_table->mutex);
+			mutex_unlock(&table->mutex);
+		}
+	} else {
+		mutex_unlock(&table->mutex);
+	}
 }
 
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
@@ -495,6 +833,220 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
 
+int mlx4_bond_mac_table(struct mlx4_dev *dev)
+{
+	struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table;
+	struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table;
+	int ret = 0;
+	int i;
+	bool update1 = false;
+	bool update2 = false;
+
+	mutex_lock(&t1->mutex);
+	mutex_lock(&t2->mutex);
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if ((t1->entries[i] != t2->entries[i]) &&
+		    t1->entries[i] && t2->entries[i]) {
+			mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i);
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (t1->entries[i] && !t2->entries[i]) {
+			t2->entries[i] = t1->entries[i];
+			t2->is_dup[i] = true;
+			update2 = true;
+		} else if (!t1->entries[i] && t2->entries[i]) {
+			t1->entries[i] = t2->entries[i];
+			t1->is_dup[i] = true;
+			update1 = true;
+		} else if (t1->entries[i] && t2->entries[i]) {
+			t1->is_dup[i] = true;
+			t2->is_dup[i] = true;
+		}
+	}
+
+	if (update1) {
+		ret = mlx4_set_port_mac_table(dev, 1, t1->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret);
+	}
+	if (!ret && update2) {
+		ret = mlx4_set_port_mac_table(dev, 2, t2->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret);
+	}
+
+	if (ret)
+		mlx4_warn(dev, "failed to create mirror MAC tables\n");
+unlock:
+	mutex_unlock(&t2->mutex);
+	mutex_unlock(&t1->mutex);
+	return ret;
+}
+
+int mlx4_unbond_mac_table(struct mlx4_dev *dev)
+{
+	struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table;
+	struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table;
+	int ret = 0;
+	int ret1;
+	int i;
+	bool update1 = false;
+	bool update2 = false;
+
+	mutex_lock(&t1->mutex);
+	mutex_lock(&t2->mutex);
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (t1->entries[i] != t2->entries[i]) {
+			mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n");
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (!t1->entries[i])
+			continue;
+		t1->is_dup[i] = false;
+		if (!t1->refs[i]) {
+			t1->entries[i] = 0;
+			update1 = true;
+		}
+		t2->is_dup[i] = false;
+		if (!t2->refs[i]) {
+			t2->entries[i] = 0;
+			update2 = true;
+		}
+	}
+
+	if (update1) {
+		ret = mlx4_set_port_mac_table(dev, 1, t1->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret);
+	}
+	if (update2) {
+		ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries);
+		if (ret1) {
+			mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1);
+			ret = ret1;
+		}
+	}
+unlock:
+	mutex_unlock(&t2->mutex);
+	mutex_unlock(&t1->mutex);
+	return ret;
+}
+
+int mlx4_bond_vlan_table(struct mlx4_dev *dev)
+{
+	struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table;
+	struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table;
+	int ret = 0;
+	int i;
+	bool update1 = false;
+	bool update2 = false;
+
+	mutex_lock(&t1->mutex);
+	mutex_lock(&t2->mutex);
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		if ((t1->entries[i] != t2->entries[i]) &&
+		    t1->entries[i] && t2->entries[i]) {
+			mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i);
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		if (t1->entries[i] && !t2->entries[i]) {
+			t2->entries[i] = t1->entries[i];
+			t2->is_dup[i] = true;
+			update2 = true;
+		} else if (!t1->entries[i] && t2->entries[i]) {
+			t1->entries[i] = t2->entries[i];
+			t1->is_dup[i] = true;
+			update1 = true;
+		} else if (t1->entries[i] && t2->entries[i]) {
+			t1->is_dup[i] = true;
+			t2->is_dup[i] = true;
+		}
+	}
+
+	if (update1) {
+		ret = mlx4_set_port_vlan_table(dev, 1, t1->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret);
+	}
+	if (!ret && update2) {
+		ret = mlx4_set_port_vlan_table(dev, 2, t2->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret);
+	}
+
+	if (ret)
+		mlx4_warn(dev, "failed to create mirror VLAN tables\n");
+unlock:
+	mutex_unlock(&t2->mutex);
+	mutex_unlock(&t1->mutex);
+	return ret;
+}
+
+int mlx4_unbond_vlan_table(struct mlx4_dev *dev)
+{
+	struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table;
+	struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table;
+	int ret = 0;
+	int ret1;
+	int i;
+	bool update1 = false;
+	bool update2 = false;
+
+	mutex_lock(&t1->mutex);
+	mutex_lock(&t2->mutex);
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		if (t1->entries[i] != t2->entries[i]) {
+			mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n");
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		if (!t1->entries[i])
+			continue;
+		t1->is_dup[i] = false;
+		if (!t1->refs[i]) {
+			t1->entries[i] = 0;
+			update1 = true;
+		}
+		t2->is_dup[i] = false;
+		if (!t2->refs[i]) {
+			t2->entries[i] = 0;
+			update2 = true;
+		}
+	}
+
+	if (update1) {
+		ret = mlx4_set_port_vlan_table(dev, 1, t1->entries);
+		if (ret)
+			mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret);
+	}
+	if (update2) {
+		ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries);
+		if (ret1) {
+			mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1);
+			ret = ret1;
+		}
+	}
+unlock:
+	mutex_unlock(&t2->mutex);
+	mutex_unlock(&t1->mutex);
+	return ret;
+}
+
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
 {
 	struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 6fec3e993d02..da7f578a3fe1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -222,6 +222,13 @@ enum res_fs_rule_states {
 struct res_fs_rule {
 	struct res_common	com;
 	int			qpn;
+	/* VF DMFS mbox with port flipped */
+	void			*mirr_mbox;
+	/* > 0 --> apply mirror when getting into HA mode      */
+	/* = 0 --> un-apply mirror when getting out of HA mode */
+	u32			mirr_mbox_size;
+	struct list_head	mirr_list;
+	u64			mirr_rule_id;
 };
 
 static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
@@ -4284,6 +4291,22 @@ err_mac:
 	return err;
 }
 
+static u32 qp_attach_mbox_size(void *mbox)
+{
+	u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+	struct _rule_hw  *rule_header;
+
+	rule_header = (struct _rule_hw *)(mbox + size);
+
+	while (rule_header->size) {
+		size += rule_header->size * sizeof(u32);
+		rule_header += 1;
+	}
+	return size;
+}
+
+static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule);
+
 int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_vhcr *vhcr,
 					 struct mlx4_cmd_mailbox *inbox,
@@ -4300,6 +4323,8 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
 	struct _rule_hw  *rule_header;
 	int header_id;
+	struct res_fs_rule *rrule;
+	u32 mbox_size;
 
 	if (dev->caps.steering_mode !=
 	    MLX4_STEERING_MODE_DEVICE_MANAGED)
@@ -4328,7 +4353,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	case MLX4_NET_TRANS_RULE_ID_ETH:
 		if (validate_eth_header_mac(slave, rule_header, rlist)) {
 			err = -EINVAL;
-			goto err_put;
+			goto err_put_qp;
 		}
 		break;
 	case MLX4_NET_TRANS_RULE_ID_IB:
@@ -4339,7 +4364,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 		pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n");
 		if (add_eth_header(dev, slave, inbox, rlist, header_id)) {
 			err = -EINVAL;
-			goto err_put;
+			goto err_put_qp;
 		}
 		vhcr->in_modifier +=
 			sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
@@ -4347,7 +4372,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	default:
 		pr_err("Corrupted mailbox\n");
 		err = -EINVAL;
-		goto err_put;
+		goto err_put_qp;
 	}
 
 execute:
@@ -4356,23 +4381,69 @@ execute:
 			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
 			   MLX4_CMD_NATIVE);
 	if (err)
-		goto err_put;
+		goto err_put_qp;
+
 
 	err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn);
 	if (err) {
 		mlx4_err(dev, "Fail to add flow steering resources\n");
-		/* detach rule*/
+		goto err_detach;
+	}
+
+	err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule);
+	if (err)
+		goto err_detach;
+
+	mbox_size = qp_attach_mbox_size(inbox->buf);
+	rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL);
+	if (!rrule->mirr_mbox) {
+		err = -ENOMEM;
+		goto err_put_rule;
+	}
+	rrule->mirr_mbox_size = mbox_size;
+	rrule->mirr_rule_id = 0;
+	memcpy(rrule->mirr_mbox, inbox->buf, mbox_size);
+
+	/* set different port */
+	ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox;
+	if (ctrl->port == 1)
+		ctrl->port = 2;
+	else
+		ctrl->port = 1;
+
+	if (mlx4_is_bonded(dev))
+		mlx4_do_mirror_rule(dev, rrule);
+
+	atomic_inc(&rqp->ref_count);
+
+err_put_rule:
+	put_res(dev, slave, vhcr->out_param, RES_FS_RULE);
+err_detach:
+	/* detach rule on error */
+	if (err)
 		mlx4_cmd(dev, vhcr->out_param, 0, 0,
 			 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
 			 MLX4_CMD_NATIVE);
-		goto err_put;
-	}
-	atomic_inc(&rqp->ref_count);
-err_put:
+err_put_qp:
 	put_res(dev, slave, qpn, RES_QP);
 	return err;
 }
 
+static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule)
+{
+	int err;
+
+	err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0);
+	if (err) {
+		mlx4_err(dev, "Fail to remove flow steering resources\n");
+		return err;
+	}
+
+	mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH,
+		 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+	return 0;
+}
+
 int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_vhcr *vhcr,
 					 struct mlx4_cmd_mailbox *inbox,
@@ -4382,6 +4453,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 	int err;
 	struct res_qp *rqp;
 	struct res_fs_rule *rrule;
+	u64 mirr_reg_id;
 
 	if (dev->caps.steering_mode !=
 	    MLX4_STEERING_MODE_DEVICE_MANAGED)
@@ -4390,12 +4462,30 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 	err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule);
 	if (err)
 		return err;
+
+	if (!rrule->mirr_mbox) {
+		mlx4_err(dev, "Mirror rules cannot be removed explicitly\n");
+		put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
+		return -EINVAL;
+	}
+	mirr_reg_id = rrule->mirr_rule_id;
+	kfree(rrule->mirr_mbox);
+
 	/* Release the rule form busy state before removal */
 	put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
 	err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp);
 	if (err)
 		return err;
 
+	if (mirr_reg_id && mlx4_is_bonded(dev)) {
+		err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule);
+		if (err) {
+			mlx4_err(dev, "Fail to get resource of mirror rule\n");
+		} else {
+			put_res(dev, slave, mirr_reg_id, RES_FS_RULE);
+			mlx4_undo_mirror_rule(dev, rrule);
+		}
+	}
 	err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
 	if (err) {
 		mlx4_err(dev, "Fail to remove flow steering resources\n");
@@ -4833,6 +4923,91 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
 	spin_unlock_irq(mlx4_tlock(dev));
 }
 
+static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+	struct res_fs_rule *mirr_rule;
+	u64 reg_id;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	if (!fs_rule->mirr_mbox) {
+		mlx4_err(dev, "rule mirroring mailbox is null\n");
+		return -EINVAL;
+	}
+	memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size);
+	err = mlx4_cmd_imm(dev, mailbox->dma, &reg_id, fs_rule->mirr_mbox_size >> 2, 0,
+			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	if (err)
+		goto err;
+
+	err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn);
+	if (err)
+		goto err_detach;
+
+	err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule);
+	if (err)
+		goto err_rem;
+
+	fs_rule->mirr_rule_id = reg_id;
+	mirr_rule->mirr_rule_id = 0;
+	mirr_rule->mirr_mbox_size = 0;
+	mirr_rule->mirr_mbox = NULL;
+	put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE);
+
+	return 0;
+err_rem:
+	rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0);
+err_detach:
+	mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH,
+		 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+err:
+	return err;
+}
+
+static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_resource_tracker *tracker =
+		&priv->mfunc.master.res_tracker;
+	struct rb_root *root = &tracker->res_tree[RES_FS_RULE];
+	struct rb_node *p;
+	struct res_fs_rule *fs_rule;
+	int err = 0;
+	LIST_HEAD(mirr_list);
+
+	for (p = rb_first(root); p; p = rb_next(p)) {
+		fs_rule = rb_entry(p, struct res_fs_rule, com.node);
+		if ((bond && fs_rule->mirr_mbox_size) ||
+		    (!bond && !fs_rule->mirr_mbox_size))
+			list_add_tail(&fs_rule->mirr_list, &mirr_list);
+	}
+
+	list_for_each_entry(fs_rule, &mirr_list, mirr_list) {
+		if (bond)
+			err += mlx4_do_mirror_rule(dev, fs_rule);
+		else
+			err += mlx4_undo_mirror_rule(dev, fs_rule);
+	}
+	return err;
+}
+
+int mlx4_bond_fs_rules(struct mlx4_dev *dev)
+{
+	return mlx4_mirror_fs_rules(dev, true);
+}
+
+int mlx4_unbond_fs_rules(struct mlx4_dev *dev)
+{
+	return mlx4_mirror_fs_rules(dev, false);
+}
+
 static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 26a68b8af2c5..fe11e967095f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE)		+= mlx5_core.o
 
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o   \
-		mad.o transobj.o vport.o
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \
-		en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \
+		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
+		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
 		en_txrx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 69f1c1a412b4..f689ce580b44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -64,6 +64,8 @@
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_BF_BUDGET             16
 
+#define MLX5E_NUM_MAIN_GROUPS 9
+
 static const char vport_strings[][ETH_GSTRING_LEN] = {
 	/* vport statistics */
 	"rx_packets",
@@ -442,7 +444,7 @@ enum mlx5e_rqt_ix {
 struct mlx5e_eth_addr_info {
 	u8  addr[ETH_ALEN + 2];
 	u32 tt_vec;
-	u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */
+	struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT];
 };
 
 #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE)
@@ -465,15 +467,23 @@ enum {
 };
 
 struct mlx5e_vlan_db {
-	u32           active_vlans_ft_ix[VLAN_N_VID];
-	u32           untagged_rule_ft_ix;
-	u32           any_vlan_rule_ft_ix;
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	struct mlx5_flow_rule	*active_vlans_rule[VLAN_N_VID];
+	struct mlx5_flow_rule	*untagged_rule;
+	struct mlx5_flow_rule	*any_vlan_rule;
 	bool          filter_disabled;
 };
 
 struct mlx5e_flow_table {
-	void *vlan;
-	void *main;
+	int num_groups;
+	struct mlx5_flow_table		*t;
+	struct mlx5_flow_group		**g;
+};
+
+struct mlx5e_flow_tables {
+	struct mlx5_flow_namespace	*ns;
+	struct mlx5e_flow_table		vlan;
+	struct mlx5e_flow_table		main;
 };
 
 struct mlx5e_priv {
@@ -496,7 +506,7 @@ struct mlx5e_priv {
 	u32                        rqtn[MLX5E_NUM_RQT];
 	u32                        tirn[MLX5E_NUM_TT];
 
-	struct mlx5e_flow_table    ft;
+	struct mlx5e_flow_tables   fts;
 	struct mlx5e_eth_addr_db   eth_addr;
 	struct mlx5e_vlan_db       vlan;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
deleted file mode 100644
index 22d603f78273..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c
+++ /dev/null
@@ -1,907 +0,0 @@
-/*
- * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/list.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/mlx5/flow_table.h>
-#include "en.h"
-
-enum {
-	MLX5E_FULLMATCH = 0,
-	MLX5E_ALLMULTI  = 1,
-	MLX5E_PROMISC   = 2,
-};
-
-enum {
-	MLX5E_UC        = 0,
-	MLX5E_MC_IPV4   = 1,
-	MLX5E_MC_IPV6   = 2,
-	MLX5E_MC_OTHER  = 3,
-};
-
-enum {
-	MLX5E_ACTION_NONE = 0,
-	MLX5E_ACTION_ADD  = 1,
-	MLX5E_ACTION_DEL  = 2,
-};
-
-struct mlx5e_eth_addr_hash_node {
-	struct hlist_node          hlist;
-	u8                         action;
-	struct mlx5e_eth_addr_info ai;
-};
-
-static inline int mlx5e_hash_eth_addr(u8 *addr)
-{
-	return addr[5];
-}
-
-static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr)
-{
-	struct mlx5e_eth_addr_hash_node *hn;
-	int ix = mlx5e_hash_eth_addr(addr);
-	int found = 0;
-
-	hlist_for_each_entry(hn, &hash[ix], hlist)
-		if (ether_addr_equal_64bits(hn->ai.addr, addr)) {
-			found = 1;
-			break;
-		}
-
-	if (found) {
-		hn->action = MLX5E_ACTION_NONE;
-		return;
-	}
-
-	hn = kzalloc(sizeof(*hn), GFP_ATOMIC);
-	if (!hn)
-		return;
-
-	ether_addr_copy(hn->ai.addr, addr);
-	hn->action = MLX5E_ACTION_ADD;
-
-	hlist_add_head(&hn->hlist, &hash[ix]);
-}
-
-static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn)
-{
-	hlist_del(&hn->hlist);
-	kfree(hn);
-}
-
-static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv,
-					       struct mlx5e_eth_addr_info *ai)
-{
-	void *ft = priv->ft.main;
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP))
-		mlx5_del_flow_table_entry(ft,
-					  ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP))
-		mlx5_del_flow_table_entry(ft,
-					  ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH))
-		mlx5_del_flow_table_entry(ft,
-					  ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH))
-		mlx5_del_flow_table_entry(ft,
-					  ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV6))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_IPV4))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]);
-
-	if (ai->tt_vec & BIT(MLX5E_TT_ANY))
-		mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]);
-}
-
-static int mlx5e_get_eth_addr_type(u8 *addr)
-{
-	if (is_unicast_ether_addr(addr))
-		return MLX5E_UC;
-
-	if ((addr[0] == 0x01) &&
-	    (addr[1] == 0x00) &&
-	    (addr[2] == 0x5e) &&
-	   !(addr[3] &  0x80))
-		return MLX5E_MC_IPV4;
-
-	if ((addr[0] == 0x33) &&
-	    (addr[1] == 0x33))
-		return MLX5E_MC_IPV6;
-
-	return MLX5E_MC_OTHER;
-}
-
-static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type)
-{
-	int eth_addr_type;
-	u32 ret;
-
-	switch (type) {
-	case MLX5E_FULLMATCH:
-		eth_addr_type = mlx5e_get_eth_addr_type(ai->addr);
-		switch (eth_addr_type) {
-		case MLX5E_UC:
-			ret =
-				BIT(MLX5E_TT_IPV4_TCP)       |
-				BIT(MLX5E_TT_IPV6_TCP)       |
-				BIT(MLX5E_TT_IPV4_UDP)       |
-				BIT(MLX5E_TT_IPV6_UDP)       |
-				BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
-				BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
-				BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
-				BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
-				BIT(MLX5E_TT_IPV4)           |
-				BIT(MLX5E_TT_IPV6)           |
-				BIT(MLX5E_TT_ANY)            |
-				0;
-			break;
-
-		case MLX5E_MC_IPV4:
-			ret =
-				BIT(MLX5E_TT_IPV4_UDP)       |
-				BIT(MLX5E_TT_IPV4)           |
-				0;
-			break;
-
-		case MLX5E_MC_IPV6:
-			ret =
-				BIT(MLX5E_TT_IPV6_UDP)       |
-				BIT(MLX5E_TT_IPV6)           |
-				0;
-			break;
-
-		case MLX5E_MC_OTHER:
-			ret =
-				BIT(MLX5E_TT_ANY)            |
-				0;
-			break;
-		}
-
-		break;
-
-	case MLX5E_ALLMULTI:
-		ret =
-			BIT(MLX5E_TT_IPV4_UDP) |
-			BIT(MLX5E_TT_IPV6_UDP) |
-			BIT(MLX5E_TT_IPV4)     |
-			BIT(MLX5E_TT_IPV6)     |
-			BIT(MLX5E_TT_ANY)      |
-			0;
-		break;
-
-	default: /* MLX5E_PROMISC */
-		ret =
-			BIT(MLX5E_TT_IPV4_TCP)       |
-			BIT(MLX5E_TT_IPV6_TCP)       |
-			BIT(MLX5E_TT_IPV4_UDP)       |
-			BIT(MLX5E_TT_IPV6_UDP)       |
-			BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
-			BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
-			BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
-			BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
-			BIT(MLX5E_TT_IPV4)           |
-			BIT(MLX5E_TT_IPV6)           |
-			BIT(MLX5E_TT_ANY)            |
-			0;
-		break;
-	}
-
-	return ret;
-}
-
-static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
-				     struct mlx5e_eth_addr_info *ai, int type,
-				     void *flow_context, void *match_criteria)
-{
-	u8 match_criteria_enable = 0;
-	void *match_value;
-	void *dest;
-	u8   *dmac;
-	u8   *match_criteria_dmac;
-	void *ft   = priv->ft.main;
-	u32  *tirn = priv->tirn;
-	u32  *ft_ix;
-	u32  tt_vec;
-	int  err;
-
-	match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value);
-	dmac = MLX5_ADDR_OF(fte_match_param, match_value,
-			    outer_headers.dmac_47_16);
-	match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
-					   outer_headers.dmac_47_16);
-	dest = MLX5_ADDR_OF(flow_context, flow_context, destination);
-
-	MLX5_SET(flow_context, flow_context, action,
-		 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
-	MLX5_SET(flow_context, flow_context, destination_list_size, 1);
-	MLX5_SET(dest_format_struct, dest, destination_type,
-		 MLX5_FLOW_CONTEXT_DEST_TYPE_TIR);
-
-	switch (type) {
-	case MLX5E_FULLMATCH:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		memset(match_criteria_dmac, 0xff, ETH_ALEN);
-		ether_addr_copy(dmac, ai->addr);
-		break;
-
-	case MLX5E_ALLMULTI:
-		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-		match_criteria_dmac[0] = 0x01;
-		dmac[0] = 0x01;
-		break;
-
-	case MLX5E_PROMISC:
-		break;
-	}
-
-	tt_vec = mlx5e_get_tt_vec(ai, type);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_ANY];
-	if (tt_vec & BIT(MLX5E_TT_ANY)) {
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_ANY]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_ANY);
-	}
-
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-			 outer_headers.ethertype);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV4];
-	if (tt_vec & BIT(MLX5E_TT_IPV4)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IP);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV4]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV4);
-	}
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV6];
-	if (tt_vec & BIT(MLX5E_TT_IPV6)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IPV6);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV6]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV6);
-	}
-
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-			 outer_headers.ip_protocol);
-	MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
-		 IPPROTO_UDP);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_UDP];
-	if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IP);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV4_UDP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP);
-	}
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_UDP];
-	if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IPV6);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV6_UDP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP);
-	}
-
-	MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
-		 IPPROTO_TCP);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_TCP];
-	if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IP);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV4_TCP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP);
-	}
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_TCP];
-	if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IPV6);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV6_TCP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP);
-	}
-
-	MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
-		 IPPROTO_AH);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_AH];
-	if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IP);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV4_IPSEC_AH]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH);
-	}
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_AH];
-	if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IPV6);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV6_IPSEC_AH]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH);
-	}
-
-	MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
-		 IPPROTO_ESP);
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV4_IPSEC_ESP];
-	if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IP);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV4_IPSEC_ESP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP);
-	}
-
-	ft_ix = &ai->ft_ix[MLX5E_TT_IPV6_IPSEC_ESP];
-	if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) {
-		MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
-			 ETH_P_IPV6);
-		MLX5_SET(dest_format_struct, dest, destination_id,
-			 tirn[MLX5E_TT_IPV6_IPSEC_ESP]);
-		err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
-						match_criteria, flow_context,
-						ft_ix);
-		if (err)
-			goto err_del_ai;
-
-		ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP);
-	}
-
-	return 0;
-
-err_del_ai:
-	mlx5e_del_eth_addr_from_flow_table(priv, ai);
-
-	return err;
-}
-
-static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
-				   struct mlx5e_eth_addr_info *ai, int type)
-{
-	u32 *flow_context;
-	u32 *match_criteria;
-	int err;
-
-	flow_context   = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) +
-				      MLX5_ST_SZ_BYTES(dest_format_struct));
-	match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!flow_context || !match_criteria) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_eth_addr_rule_out;
-	}
-
-	err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context,
-					match_criteria);
-	if (err)
-		netdev_err(priv->netdev, "%s: failed\n", __func__);
-
-add_eth_addr_rule_out:
-	kvfree(match_criteria);
-	kvfree(flow_context);
-	return err;
-}
-
-enum mlx5e_vlan_rule_type {
-	MLX5E_VLAN_RULE_TYPE_UNTAGGED,
-	MLX5E_VLAN_RULE_TYPE_ANY_VID,
-	MLX5E_VLAN_RULE_TYPE_MATCH_VID,
-};
-
-static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
-			       enum mlx5e_vlan_rule_type rule_type, u16 vid)
-{
-	u8 match_criteria_enable = 0;
-	u32 *flow_context;
-	void *match_value;
-	void *dest;
-	u32 *match_criteria;
-	u32 *ft_ix;
-	int err;
-
-	flow_context   = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) +
-				      MLX5_ST_SZ_BYTES(dest_format_struct));
-	match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
-	if (!flow_context || !match_criteria) {
-		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
-		err = -ENOMEM;
-		goto add_vlan_rule_out;
-	}
-	match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value);
-	dest = MLX5_ADDR_OF(flow_context, flow_context, destination);
-
-	MLX5_SET(flow_context, flow_context, action,
-		 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
-	MLX5_SET(flow_context, flow_context, destination_list_size, 1);
-	MLX5_SET(dest_format_struct, dest, destination_type,
-		 MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE);
-	MLX5_SET(dest_format_struct, dest, destination_id,
-		 mlx5_get_flow_table_id(priv->ft.main));
-
-	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-			 outer_headers.vlan_tag);
-
-	switch (rule_type) {
-	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
-		ft_ix = &priv->vlan.untagged_rule_ft_ix;
-		break;
-	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
-		ft_ix = &priv->vlan.any_vlan_rule_ft_ix;
-		MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag,
-			 1);
-		break;
-	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
-		ft_ix = &priv->vlan.active_vlans_ft_ix[vid];
-		MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag,
-			 1);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-				 outer_headers.first_vid);
-		MLX5_SET(fte_match_param, match_value, outer_headers.first_vid,
-			 vid);
-		break;
-	}
-
-	err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable,
-					match_criteria, flow_context, ft_ix);
-	if (err)
-		netdev_err(priv->netdev, "%s: failed\n", __func__);
-
-add_vlan_rule_out:
-	kvfree(match_criteria);
-	kvfree(flow_context);
-	return err;
-}
-
-static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
-				enum mlx5e_vlan_rule_type rule_type, u16 vid)
-{
-	switch (rule_type) {
-	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
-		mlx5_del_flow_table_entry(priv->ft.vlan,
-					  priv->vlan.untagged_rule_ft_ix);
-		break;
-	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
-		mlx5_del_flow_table_entry(priv->ft.vlan,
-					  priv->vlan.any_vlan_rule_ft_ix);
-		break;
-	case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
-		mlx5_del_flow_table_entry(priv->ft.vlan,
-					  priv->vlan.active_vlans_ft_ix[vid]);
-		break;
-	}
-}
-
-void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv)
-{
-	if (!priv->vlan.filter_disabled)
-		return;
-
-	priv->vlan.filter_disabled = false;
-	if (priv->netdev->flags & IFF_PROMISC)
-		return;
-	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
-}
-
-void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv)
-{
-	if (priv->vlan.filter_disabled)
-		return;
-
-	priv->vlan.filter_disabled = true;
-	if (priv->netdev->flags & IFF_PROMISC)
-		return;
-	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
-}
-
-int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
-			  u16 vid)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
-}
-
-int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
-			   u16 vid)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
-
-	return 0;
-}
-
-#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
-	for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \
-		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
-
-static void mlx5e_execute_action(struct mlx5e_priv *priv,
-				 struct mlx5e_eth_addr_hash_node *hn)
-{
-	switch (hn->action) {
-	case MLX5E_ACTION_ADD:
-		mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH);
-		hn->action = MLX5E_ACTION_NONE;
-		break;
-
-	case MLX5E_ACTION_DEL:
-		mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai);
-		mlx5e_del_eth_addr_from_hash(hn);
-		break;
-	}
-}
-
-static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
-{
-	struct net_device *netdev = priv->netdev;
-	struct netdev_hw_addr *ha;
-
-	netif_addr_lock_bh(netdev);
-
-	mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc,
-				   priv->netdev->dev_addr);
-
-	netdev_for_each_uc_addr(ha, netdev)
-		mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr);
-
-	netdev_for_each_mc_addr(ha, netdev)
-		mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr);
-
-	netif_addr_unlock_bh(netdev);
-}
-
-static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
-{
-	struct mlx5e_eth_addr_hash_node *hn;
-	struct hlist_node *tmp;
-	int i;
-
-	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i)
-		mlx5e_execute_action(priv, hn);
-
-	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i)
-		mlx5e_execute_action(priv, hn);
-}
-
-static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
-{
-	struct mlx5e_eth_addr_hash_node *hn;
-	struct hlist_node *tmp;
-	int i;
-
-	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i)
-		hn->action = MLX5E_ACTION_DEL;
-	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i)
-		hn->action = MLX5E_ACTION_DEL;
-
-	if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
-		mlx5e_sync_netdev_addr(priv);
-
-	mlx5e_apply_netdev_addr(priv);
-}
-
-void mlx5e_set_rx_mode_work(struct work_struct *work)
-{
-	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
-					       set_rx_mode_work);
-
-	struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
-	struct net_device *ndev = priv->netdev;
-
-	bool rx_mode_enable   = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
-	bool promisc_enabled   = rx_mode_enable && (ndev->flags & IFF_PROMISC);
-	bool allmulti_enabled  = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
-	bool broadcast_enabled = rx_mode_enable;
-
-	bool enable_promisc    = !ea->promisc_enabled   &&  promisc_enabled;
-	bool disable_promisc   =  ea->promisc_enabled   && !promisc_enabled;
-	bool enable_allmulti   = !ea->allmulti_enabled  &&  allmulti_enabled;
-	bool disable_allmulti  =  ea->allmulti_enabled  && !allmulti_enabled;
-	bool enable_broadcast  = !ea->broadcast_enabled &&  broadcast_enabled;
-	bool disable_broadcast =  ea->broadcast_enabled && !broadcast_enabled;
-
-	if (enable_promisc) {
-		mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC);
-		if (!priv->vlan.filter_disabled)
-			mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
-					    0);
-	}
-	if (enable_allmulti)
-		mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
-	if (enable_broadcast)
-		mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
-
-	mlx5e_handle_netdev_addr(priv);
-
-	if (disable_broadcast)
-		mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast);
-	if (disable_allmulti)
-		mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti);
-	if (disable_promisc) {
-		if (!priv->vlan.filter_disabled)
-			mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
-					    0);
-		mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc);
-	}
-
-	ea->promisc_enabled   = promisc_enabled;
-	ea->allmulti_enabled  = allmulti_enabled;
-	ea->broadcast_enabled = broadcast_enabled;
-}
-
-void mlx5e_init_eth_addr(struct mlx5e_priv *priv)
-{
-	ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast);
-}
-
-static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
-{
-	struct mlx5_flow_table_group *g;
-	u8 *dmac;
-
-	g = kcalloc(9, sizeof(*g), GFP_KERNEL);
-	if (!g)
-		return -ENOMEM;
-
-	g[0].log_sz = 3;
-	g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
-			 outer_headers.ethertype);
-	MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
-			 outer_headers.ip_protocol);
-
-	g[1].log_sz = 1;
-	g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria,
-			 outer_headers.ethertype);
-
-	g[2].log_sz = 0;
-
-	g[3].log_sz = 14;
-	g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria,
-			    outer_headers.dmac_47_16);
-	memset(dmac, 0xff, ETH_ALEN);
-	MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria,
-			 outer_headers.ethertype);
-	MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria,
-			 outer_headers.ip_protocol);
-
-	g[4].log_sz = 13;
-	g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria,
-			    outer_headers.dmac_47_16);
-	memset(dmac, 0xff, ETH_ALEN);
-	MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria,
-			 outer_headers.ethertype);
-
-	g[5].log_sz = 11;
-	g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria,
-			    outer_headers.dmac_47_16);
-	memset(dmac, 0xff, ETH_ALEN);
-
-	g[6].log_sz = 2;
-	g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria,
-			    outer_headers.dmac_47_16);
-	dmac[0] = 0x01;
-	MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria,
-			 outer_headers.ethertype);
-	MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria,
-			 outer_headers.ip_protocol);
-
-	g[7].log_sz = 1;
-	g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria,
-			    outer_headers.dmac_47_16);
-	dmac[0] = 0x01;
-	MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria,
-			 outer_headers.ethertype);
-
-	g[8].log_sz = 0;
-	g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria,
-			    outer_headers.dmac_47_16);
-	dmac[0] = 0x01;
-	priv->ft.main = mlx5_create_flow_table(priv->mdev, 1,
-					       MLX5_FLOW_TABLE_TYPE_NIC_RCV,
-					       9, g);
-	kfree(g);
-
-	return priv->ft.main ? 0 : -ENOMEM;
-}
-
-static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv)
-{
-	mlx5_destroy_flow_table(priv->ft.main);
-}
-
-static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
-{
-	struct mlx5_flow_table_group *g;
-
-	g = kcalloc(2, sizeof(*g), GFP_KERNEL);
-	if (!g)
-		return -ENOMEM;
-
-	g[0].log_sz = 12;
-	g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
-			 outer_headers.vlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
-			 outer_headers.first_vid);
-
-	/* untagged + any vlan id */
-	g[1].log_sz = 1;
-	g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria,
-			 outer_headers.vlan_tag);
-
-	priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0,
-					       MLX5_FLOW_TABLE_TYPE_NIC_RCV,
-					       2, g);
-
-	kfree(g);
-	return priv->ft.vlan ? 0 : -ENOMEM;
-}
-
-static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv)
-{
-	mlx5_destroy_flow_table(priv->ft.vlan);
-}
-
-int mlx5e_create_flow_tables(struct mlx5e_priv *priv)
-{
-	int err;
-
-	err = mlx5e_create_main_flow_table(priv);
-	if (err)
-		return err;
-
-	err = mlx5e_create_vlan_flow_table(priv);
-	if (err)
-		goto err_destroy_main_flow_table;
-
-	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
-	if (err)
-		goto err_destroy_vlan_flow_table;
-
-	return 0;
-
-err_destroy_vlan_flow_table:
-	mlx5e_destroy_vlan_flow_table(priv);
-
-err_destroy_main_flow_table:
-	mlx5e_destroy_main_flow_table(priv);
-
-	return err;
-}
-
-void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv)
-{
-	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
-	mlx5e_destroy_vlan_flow_table(priv);
-	mlx5e_destroy_main_flow_table(priv);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
new file mode 100644
index 000000000000..80d81abc4820
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -0,0 +1,1224 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
+enum {
+	MLX5E_FULLMATCH = 0,
+	MLX5E_ALLMULTI  = 1,
+	MLX5E_PROMISC   = 2,
+};
+
+enum {
+	MLX5E_UC        = 0,
+	MLX5E_MC_IPV4   = 1,
+	MLX5E_MC_IPV6   = 2,
+	MLX5E_MC_OTHER  = 3,
+};
+
+enum {
+	MLX5E_ACTION_NONE = 0,
+	MLX5E_ACTION_ADD  = 1,
+	MLX5E_ACTION_DEL  = 2,
+};
+
+struct mlx5e_eth_addr_hash_node {
+	struct hlist_node          hlist;
+	u8                         action;
+	struct mlx5e_eth_addr_info ai;
+};
+
+static inline int mlx5e_hash_eth_addr(u8 *addr)
+{
+	return addr[5];
+}
+
+static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr)
+{
+	struct mlx5e_eth_addr_hash_node *hn;
+	int ix = mlx5e_hash_eth_addr(addr);
+	int found = 0;
+
+	hlist_for_each_entry(hn, &hash[ix], hlist)
+		if (ether_addr_equal_64bits(hn->ai.addr, addr)) {
+			found = 1;
+			break;
+		}
+
+	if (found) {
+		hn->action = MLX5E_ACTION_NONE;
+		return;
+	}
+
+	hn = kzalloc(sizeof(*hn), GFP_ATOMIC);
+	if (!hn)
+		return;
+
+	ether_addr_copy(hn->ai.addr, addr);
+	hn->action = MLX5E_ACTION_ADD;
+
+	hlist_add_head(&hn->hlist, &hash[ix]);
+}
+
+static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn)
+{
+	hlist_del(&hn->hlist);
+	kfree(hn);
+}
+
+static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv,
+					       struct mlx5e_eth_addr_info *ai)
+{
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV6))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_IPV4))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]);
+
+	if (ai->tt_vec & BIT(MLX5E_TT_ANY))
+		mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]);
+}
+
+static int mlx5e_get_eth_addr_type(u8 *addr)
+{
+	if (is_unicast_ether_addr(addr))
+		return MLX5E_UC;
+
+	if ((addr[0] == 0x01) &&
+	    (addr[1] == 0x00) &&
+	    (addr[2] == 0x5e) &&
+	   !(addr[3] &  0x80))
+		return MLX5E_MC_IPV4;
+
+	if ((addr[0] == 0x33) &&
+	    (addr[1] == 0x33))
+		return MLX5E_MC_IPV6;
+
+	return MLX5E_MC_OTHER;
+}
+
+static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type)
+{
+	int eth_addr_type;
+	u32 ret;
+
+	switch (type) {
+	case MLX5E_FULLMATCH:
+		eth_addr_type = mlx5e_get_eth_addr_type(ai->addr);
+		switch (eth_addr_type) {
+		case MLX5E_UC:
+			ret =
+				BIT(MLX5E_TT_IPV4_TCP)       |
+				BIT(MLX5E_TT_IPV6_TCP)       |
+				BIT(MLX5E_TT_IPV4_UDP)       |
+				BIT(MLX5E_TT_IPV6_UDP)       |
+				BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
+				BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
+				BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
+				BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
+				BIT(MLX5E_TT_IPV4)           |
+				BIT(MLX5E_TT_IPV6)           |
+				BIT(MLX5E_TT_ANY)            |
+				0;
+			break;
+
+		case MLX5E_MC_IPV4:
+			ret =
+				BIT(MLX5E_TT_IPV4_UDP)       |
+				BIT(MLX5E_TT_IPV4)           |
+				0;
+			break;
+
+		case MLX5E_MC_IPV6:
+			ret =
+				BIT(MLX5E_TT_IPV6_UDP)       |
+				BIT(MLX5E_TT_IPV6)           |
+				0;
+			break;
+
+		case MLX5E_MC_OTHER:
+			ret =
+				BIT(MLX5E_TT_ANY)            |
+				0;
+			break;
+		}
+
+		break;
+
+	case MLX5E_ALLMULTI:
+		ret =
+			BIT(MLX5E_TT_IPV4_UDP) |
+			BIT(MLX5E_TT_IPV6_UDP) |
+			BIT(MLX5E_TT_IPV4)     |
+			BIT(MLX5E_TT_IPV6)     |
+			BIT(MLX5E_TT_ANY)      |
+			0;
+		break;
+
+	default: /* MLX5E_PROMISC */
+		ret =
+			BIT(MLX5E_TT_IPV4_TCP)       |
+			BIT(MLX5E_TT_IPV6_TCP)       |
+			BIT(MLX5E_TT_IPV4_UDP)       |
+			BIT(MLX5E_TT_IPV6_UDP)       |
+			BIT(MLX5E_TT_IPV4_IPSEC_AH)  |
+			BIT(MLX5E_TT_IPV6_IPSEC_AH)  |
+			BIT(MLX5E_TT_IPV4_IPSEC_ESP) |
+			BIT(MLX5E_TT_IPV6_IPSEC_ESP) |
+			BIT(MLX5E_TT_IPV4)           |
+			BIT(MLX5E_TT_IPV6)           |
+			BIT(MLX5E_TT_ANY)            |
+			0;
+		break;
+	}
+
+	return ret;
+}
+
+static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
+				     struct mlx5e_eth_addr_info *ai,
+				     int type, u32 *mc, u32 *mv)
+{
+	struct mlx5_flow_destination dest;
+	u8 match_criteria_enable = 0;
+	struct mlx5_flow_rule **rule_p;
+	struct mlx5_flow_table *ft = priv->fts.main.t;
+	u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
+				   outer_headers.dmac_47_16);
+	u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv,
+				   outer_headers.dmac_47_16);
+	u32 *tirn = priv->tirn;
+	u32 tt_vec;
+	int err = 0;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+
+	switch (type) {
+	case MLX5E_FULLMATCH:
+		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		eth_broadcast_addr(mc_dmac);
+		ether_addr_copy(mv_dmac, ai->addr);
+		break;
+
+	case MLX5E_ALLMULTI:
+		match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		mc_dmac[0] = 0x01;
+		mv_dmac[0] = 0x01;
+		break;
+
+	case MLX5E_PROMISC:
+		break;
+	}
+
+	tt_vec = mlx5e_get_tt_vec(ai, type);
+
+	if (tt_vec & BIT(MLX5E_TT_ANY)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_ANY];
+		dest.tir_num = tirn[MLX5E_TT_ANY];
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_ANY);
+	}
+
+	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+	if (tt_vec & BIT(MLX5E_TT_IPV4)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV4];
+		dest.tir_num = tirn[MLX5E_TT_IPV4];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IP);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV4);
+	}
+
+	if (tt_vec & BIT(MLX5E_TT_IPV6)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV6];
+		dest.tir_num = tirn[MLX5E_TT_IPV6];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IPV6);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV6);
+	}
+
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+	MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP);
+
+	if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP];
+		dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IP);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP);
+	}
+
+	if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP];
+		dest.tir_num = tirn[MLX5E_TT_IPV6_UDP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IPV6);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP);
+	}
+
+	MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP);
+
+	if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP];
+		dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IP);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP);
+	}
+
+	if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP];
+		dest.tir_num = tirn[MLX5E_TT_IPV6_TCP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IPV6);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+
+		ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP);
+	}
+
+	MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH);
+
+	if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH];
+		dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IP);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH);
+	}
+
+	if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH];
+		dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IPV6);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH);
+	}
+
+	MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP);
+
+	if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP];
+		dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IP);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP);
+	}
+
+	if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) {
+		rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP];
+		dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP];
+		MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+			 ETH_P_IPV6);
+		*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+					     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+		if (IS_ERR_OR_NULL(*rule_p))
+			goto err_del_ai;
+		ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP);
+	}
+
+	return 0;
+
+err_del_ai:
+	err = PTR_ERR(*rule_p);
+	*rule_p = NULL;
+	mlx5e_del_eth_addr_from_flow_table(priv, ai);
+
+	return err;
+}
+
+static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
+				   struct mlx5e_eth_addr_info *ai, int type)
+{
+	u32 *match_criteria;
+	u32 *match_value;
+	int err = 0;
+
+	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+	if (!match_value || !match_criteria) {
+		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
+		err = -ENOMEM;
+		goto add_eth_addr_rule_out;
+	}
+
+	err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria,
+					match_value);
+
+add_eth_addr_rule_out:
+	kvfree(match_criteria);
+	kvfree(match_value);
+
+	return err;
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+{
+	struct net_device *ndev = priv->netdev;
+	int max_list_size;
+	int list_size;
+	u16 *vlans;
+	int vlan;
+	int err;
+	int i;
+
+	list_size = 0;
+	for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID)
+		list_size++;
+
+	max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+
+	if (list_size > max_list_size) {
+		netdev_warn(ndev,
+			    "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+			    list_size, max_list_size);
+		list_size = max_list_size;
+	}
+
+	vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+	if (!vlans)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) {
+		if (i >= list_size)
+			break;
+		vlans[i++] = vlan;
+	}
+
+	err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+	if (err)
+		netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
+			   err);
+
+	kfree(vlans);
+	return err;
+}
+
+enum mlx5e_vlan_rule_type {
+	MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+	MLX5E_VLAN_RULE_TYPE_ANY_VID,
+	MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+};
+
+static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+				 enum mlx5e_vlan_rule_type rule_type,
+				 u16 vid, u32 *mc, u32 *mv)
+{
+	struct mlx5_flow_table *ft = priv->fts.vlan.t;
+	struct mlx5_flow_destination dest;
+	u8 match_criteria_enable = 0;
+	struct mlx5_flow_rule **rule_p;
+	int err = 0;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = priv->fts.main.t;
+
+	match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag);
+
+	switch (rule_type) {
+	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+		rule_p = &priv->vlan.untagged_rule;
+		break;
+	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
+		rule_p = &priv->vlan.any_vlan_rule;
+		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
+		break;
+	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
+		rule_p = &priv->vlan.active_vlans_rule[vid];
+		MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1);
+		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+		MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid);
+		break;
+	}
+
+	*rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv,
+				     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				     MLX5_FS_DEFAULT_FLOW_TAG,
+				     &dest);
+
+	if (IS_ERR(*rule_p)) {
+		err = PTR_ERR(*rule_p);
+		*rule_p = NULL;
+		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+	}
+
+	return err;
+}
+
+static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+			       enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+	u32 *match_criteria;
+	u32 *match_value;
+	int err = 0;
+
+	match_value	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+	match_criteria	= mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+	if (!match_value || !match_criteria) {
+		netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
+		err = -ENOMEM;
+		goto add_vlan_rule_out;
+	}
+
+	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID)
+		mlx5e_vport_context_update_vlans(priv);
+
+	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria,
+				    match_value);
+
+add_vlan_rule_out:
+	kvfree(match_criteria);
+	kvfree(match_value);
+
+	return err;
+}
+
+static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
+				enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+	switch (rule_type) {
+	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+		if (priv->vlan.untagged_rule) {
+			mlx5_del_flow_rule(priv->vlan.untagged_rule);
+			priv->vlan.untagged_rule = NULL;
+		}
+		break;
+	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
+		if (priv->vlan.any_vlan_rule) {
+			mlx5_del_flow_rule(priv->vlan.any_vlan_rule);
+			priv->vlan.any_vlan_rule = NULL;
+		}
+		break;
+	case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
+		mlx5e_vport_context_update_vlans(priv);
+		if (priv->vlan.active_vlans_rule[vid]) {
+			mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]);
+			priv->vlan.active_vlans_rule[vid] = NULL;
+		}
+		mlx5e_vport_context_update_vlans(priv);
+		break;
+	}
+}
+
+void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv)
+{
+	if (!priv->vlan.filter_disabled)
+		return;
+
+	priv->vlan.filter_disabled = false;
+	if (priv->netdev->flags & IFF_PROMISC)
+		return;
+	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
+void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv)
+{
+	if (priv->vlan.filter_disabled)
+		return;
+
+	priv->vlan.filter_disabled = true;
+	if (priv->netdev->flags & IFF_PROMISC)
+		return;
+	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
+			  u16 vid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	set_bit(vid, priv->vlan.active_vlans);
+
+	return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
+			   u16 vid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	clear_bit(vid, priv->vlan.active_vlans);
+
+	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+
+	return 0;
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+	for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \
+		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+static void mlx5e_execute_action(struct mlx5e_priv *priv,
+				 struct mlx5e_eth_addr_hash_node *hn)
+{
+	switch (hn->action) {
+	case MLX5E_ACTION_ADD:
+		mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+		hn->action = MLX5E_ACTION_NONE;
+		break;
+
+	case MLX5E_ACTION_DEL:
+		mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai);
+		mlx5e_del_eth_addr_from_hash(hn);
+		break;
+	}
+}
+
+static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+	struct netdev_hw_addr *ha;
+
+	netif_addr_lock_bh(netdev);
+
+	mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc,
+				   priv->netdev->dev_addr);
+
+	netdev_for_each_uc_addr(ha, netdev)
+		mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr);
+
+	netdev_for_each_mc_addr(ha, netdev)
+		mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr);
+
+	netif_addr_unlock_bh(netdev);
+}
+
+static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+				  u8 addr_array[][ETH_ALEN], int size)
+{
+	bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+	struct net_device *ndev = priv->netdev;
+	struct mlx5e_eth_addr_hash_node *hn;
+	struct hlist_head *addr_list;
+	struct hlist_node *tmp;
+	int i = 0;
+	int hi;
+
+	addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc;
+
+	if (is_uc) /* Make sure our own address is pushed first */
+		ether_addr_copy(addr_array[i++], ndev->dev_addr);
+	else if (priv->eth_addr.broadcast_enabled)
+		ether_addr_copy(addr_array[i++], ndev->broadcast);
+
+	mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+		if (ether_addr_equal(ndev->dev_addr, hn->ai.addr))
+			continue;
+		if (i >= size)
+			break;
+		ether_addr_copy(addr_array[i++], hn->ai.addr);
+	}
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+						 int list_type)
+{
+	bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+	struct mlx5e_eth_addr_hash_node *hn;
+	u8 (*addr_array)[ETH_ALEN] = NULL;
+	struct hlist_head *addr_list;
+	struct hlist_node *tmp;
+	int max_size;
+	int size;
+	int err;
+	int hi;
+
+	size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0);
+	max_size = is_uc ?
+		1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
+		1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+
+	addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc;
+	mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+		size++;
+
+	if (size > max_size) {
+		netdev_warn(priv->netdev,
+			    "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+			    is_uc ? "UC" : "MC", size, max_size);
+		size = max_size;
+	}
+
+	if (size) {
+		addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+		if (!addr_array) {
+			err = -ENOMEM;
+			goto out;
+		}
+		mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+	}
+
+	err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+out:
+	if (err)
+		netdev_err(priv->netdev,
+			   "Failed to modify vport %s list err(%d)\n",
+			   is_uc ? "UC" : "MC", err);
+	kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+{
+	struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+
+	mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
+	mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
+	mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+				      ea->allmulti_enabled,
+				      ea->promisc_enabled);
+}
+
+static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
+{
+	struct mlx5e_eth_addr_hash_node *hn;
+	struct hlist_node *tmp;
+	int i;
+
+	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i)
+		mlx5e_execute_action(priv, hn);
+
+	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i)
+		mlx5e_execute_action(priv, hn);
+}
+
+static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
+{
+	struct mlx5e_eth_addr_hash_node *hn;
+	struct hlist_node *tmp;
+	int i;
+
+	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i)
+		hn->action = MLX5E_ACTION_DEL;
+	mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i)
+		hn->action = MLX5E_ACTION_DEL;
+
+	if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
+		mlx5e_sync_netdev_addr(priv);
+
+	mlx5e_apply_netdev_addr(priv);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       set_rx_mode_work);
+
+	struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+	struct net_device *ndev = priv->netdev;
+
+	bool rx_mode_enable   = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+	bool promisc_enabled   = rx_mode_enable && (ndev->flags & IFF_PROMISC);
+	bool allmulti_enabled  = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
+	bool broadcast_enabled = rx_mode_enable;
+
+	bool enable_promisc    = !ea->promisc_enabled   &&  promisc_enabled;
+	bool disable_promisc   =  ea->promisc_enabled   && !promisc_enabled;
+	bool enable_allmulti   = !ea->allmulti_enabled  &&  allmulti_enabled;
+	bool disable_allmulti  =  ea->allmulti_enabled  && !allmulti_enabled;
+	bool enable_broadcast  = !ea->broadcast_enabled &&  broadcast_enabled;
+	bool disable_broadcast =  ea->broadcast_enabled && !broadcast_enabled;
+
+	if (enable_promisc) {
+		mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC);
+		if (!priv->vlan.filter_disabled)
+			mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
+					    0);
+	}
+	if (enable_allmulti)
+		mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+	if (enable_broadcast)
+		mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+
+	mlx5e_handle_netdev_addr(priv);
+
+	if (disable_broadcast)
+		mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast);
+	if (disable_allmulti)
+		mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti);
+	if (disable_promisc) {
+		if (!priv->vlan.filter_disabled)
+			mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
+					    0);
+		mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc);
+	}
+
+	ea->promisc_enabled   = promisc_enabled;
+	ea->allmulti_enabled  = allmulti_enabled;
+	ea->broadcast_enabled = broadcast_enabled;
+
+	mlx5e_vport_context_update(priv);
+}
+
+static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
+{
+	int i;
+
+	for (i = ft->num_groups - 1; i >= 0; i--) {
+		if (!IS_ERR_OR_NULL(ft->g[i]))
+			mlx5_destroy_flow_group(ft->g[i]);
+		ft->g[i] = NULL;
+	}
+	ft->num_groups = 0;
+}
+
+void mlx5e_init_eth_addr(struct mlx5e_priv *priv)
+{
+	ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast);
+}
+
+#define MLX5E_MAIN_GROUP0_SIZE	BIT(3)
+#define MLX5E_MAIN_GROUP1_SIZE	BIT(1)
+#define MLX5E_MAIN_GROUP2_SIZE	BIT(0)
+#define MLX5E_MAIN_GROUP3_SIZE	BIT(14)
+#define MLX5E_MAIN_GROUP4_SIZE	BIT(13)
+#define MLX5E_MAIN_GROUP5_SIZE	BIT(11)
+#define MLX5E_MAIN_GROUP6_SIZE	BIT(2)
+#define MLX5E_MAIN_GROUP7_SIZE	BIT(1)
+#define MLX5E_MAIN_GROUP8_SIZE	BIT(0)
+#define MLX5E_MAIN_TABLE_SIZE	(MLX5E_MAIN_GROUP0_SIZE +\
+				 MLX5E_MAIN_GROUP1_SIZE +\
+				 MLX5E_MAIN_GROUP2_SIZE +\
+				 MLX5E_MAIN_GROUP3_SIZE +\
+				 MLX5E_MAIN_GROUP4_SIZE +\
+				 MLX5E_MAIN_GROUP5_SIZE +\
+				 MLX5E_MAIN_GROUP6_SIZE +\
+				 MLX5E_MAIN_GROUP7_SIZE +\
+				 MLX5E_MAIN_GROUP8_SIZE)
+
+static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in,
+				      int inlen)
+{
+	u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in,
+				match_criteria.outer_headers.dmac_47_16);
+	int err;
+	int ix = 0;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP0_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+	eth_broadcast_addr(dmac);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP3_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	eth_broadcast_addr(dmac);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP4_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	eth_broadcast_addr(dmac);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP5_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+	dmac[0] = 0x01;
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP6_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	dmac[0] = 0x01;
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP7_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	dmac[0] = 0x01;
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_MAIN_GROUP8_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	return 0;
+
+err_destroy_groups:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	mlx5e_destroy_groups(ft);
+
+	return err;
+}
+
+static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft)
+{
+	u32 *in;
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int err;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	err = __mlx5e_create_main_groups(ft, in, inlen);
+
+	kvfree(in);
+	return err;
+}
+
+static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
+{
+	struct mlx5e_flow_table *ft = &priv->fts.main;
+	int err;
+
+	ft->num_groups = 0;
+	ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+	ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	if (!ft->g) {
+		err = -ENOMEM;
+		goto err_destroy_main_flow_table;
+	}
+
+	err = mlx5e_create_main_groups(ft);
+	if (err)
+		goto err_free_g;
+	return 0;
+
+err_free_g:
+	kfree(ft->g);
+
+err_destroy_main_flow_table:
+	mlx5_destroy_flow_table(ft->t);
+	ft->t = NULL;
+
+	return err;
+}
+
+static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
+{
+	mlx5e_destroy_groups(ft);
+	kfree(ft->g);
+	mlx5_destroy_flow_table(ft->t);
+	ft->t = NULL;
+}
+
+static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv)
+{
+	mlx5e_destroy_flow_table(&priv->fts.main);
+}
+
+#define MLX5E_NUM_VLAN_GROUPS	2
+#define MLX5E_VLAN_GROUP0_SIZE	BIT(12)
+#define MLX5E_VLAN_GROUP1_SIZE	BIT(1)
+#define MLX5E_VLAN_TABLE_SIZE	(MLX5E_VLAN_GROUP0_SIZE +\
+				 MLX5E_VLAN_GROUP1_SIZE)
+
+static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in,
+				      int inlen)
+{
+	int err;
+	int ix = 0;
+	u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_VLAN_GROUP0_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_VLAN_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
+	return 0;
+
+err_destroy_groups:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	mlx5e_destroy_groups(ft);
+
+	return err;
+}
+
+static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft)
+{
+	u32 *in;
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	int err;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	err = __mlx5e_create_vlan_groups(ft, in, inlen);
+
+	kvfree(in);
+	return err;
+}
+
+static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
+{
+	struct mlx5e_flow_table *ft = &priv->fts.vlan;
+	int err;
+
+	ft->num_groups = 0;
+	ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+	ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	if (!ft->g) {
+		err = -ENOMEM;
+		goto err_destroy_vlan_flow_table;
+	}
+
+	err = mlx5e_create_vlan_groups(ft);
+	if (err)
+		goto err_free_g;
+
+	return 0;
+
+err_free_g:
+	kfree(ft->g);
+
+err_destroy_vlan_flow_table:
+	mlx5_destroy_flow_table(ft->t);
+	ft->t = NULL;
+
+	return err;
+}
+
+static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv)
+{
+	mlx5e_destroy_flow_table(&priv->fts.vlan);
+}
+
+int mlx5e_create_flow_tables(struct mlx5e_priv *priv)
+{
+	int err;
+
+	priv->fts.ns = mlx5_get_flow_namespace(priv->mdev,
+					       MLX5_FLOW_NAMESPACE_KERNEL);
+
+	if (!priv->fts.ns)
+		return -EINVAL;
+
+	err = mlx5e_create_vlan_flow_table(priv);
+	if (err)
+		return err;
+
+	err = mlx5e_create_main_flow_table(priv);
+	if (err)
+		goto err_destroy_vlan_flow_table;
+
+	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+	if (err)
+		goto err_destroy_main_flow_table;
+
+	return 0;
+
+err_destroy_main_flow_table:
+	mlx5e_destroy_main_flow_table(priv);
+err_destroy_vlan_flow_table:
+	mlx5e_destroy_vlan_flow_table(priv);
+
+	return err;
+}
+
+void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv)
+{
+	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+	mlx5e_destroy_main_flow_table(priv);
+	mlx5e_destroy_vlan_flow_table(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f6a8cc787603..d4601a564699 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,8 +30,9 @@
  * SOFTWARE.
  */
 
-#include <linux/mlx5/flow_table.h>
+#include <linux/mlx5/fs.h>
 #include "en.h"
+#include "eswitch.h"
 
 struct mlx5e_rq_param {
 	u32                        rqc[MLX5_ST_SZ_DW(rqc)];
@@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	u8 port_state;
 
 	port_state = mlx5_query_vport_state(mdev,
-		MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
+		MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
 	if (port_state == VPORT_STATE_UP)
 		netif_carrier_on(priv->netdev);
@@ -1931,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
+static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
+}
+
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
+					   vlan, qos);
+}
+
+static int mlx5_vport_link2ifla(u8 esw_link)
+{
+	switch (esw_link) {
+	case MLX5_ESW_VPORT_ADMIN_STATE_DOWN:
+		return IFLA_VF_LINK_STATE_DISABLE;
+	case MLX5_ESW_VPORT_ADMIN_STATE_UP:
+		return IFLA_VF_LINK_STATE_ENABLE;
+	}
+	return IFLA_VF_LINK_STATE_AUTO;
+}
+
+static int mlx5_ifla_link2vport(u8 ifla_link)
+{
+	switch (ifla_link) {
+	case IFLA_VF_LINK_STATE_DISABLE:
+		return MLX5_ESW_VPORT_ADMIN_STATE_DOWN;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		return MLX5_ESW_VPORT_ADMIN_STATE_UP;
+	}
+	return MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+}
+
+static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
+				   int link_state)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
+					    mlx5_ifla_link2vport(link_state));
+}
+
+static int mlx5e_get_vf_config(struct net_device *dev,
+			       int vf, struct ifla_vf_info *ivi)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+
+	err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
+	if (err)
+		return err;
+	ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
+	return 0;
+}
+
+static int mlx5e_get_vf_stats(struct net_device *dev,
+			      int vf, struct ifla_vf_stats *vf_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
+					    vf_stats);
+}
+
 static struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -1941,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_vlan_rx_add_vid	 = mlx5e_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	 = mlx5e_vlan_rx_kill_vid,
 	.ndo_set_features        = mlx5e_set_features,
-	.ndo_change_mtu		 = mlx5e_change_mtu,
+	.ndo_change_mtu		 = mlx5e_change_mtu
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2028,7 +2102,12 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr);
+	mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+	if (is_zero_ether_addr(netdev->dev_addr) &&
+	    !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
+		eth_hw_addr_random(netdev);
+		mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
+	}
 }
 
 static void mlx5e_build_netdev(struct net_device *netdev)
@@ -2041,6 +2120,14 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 	if (priv->params.num_tc > 1)
 		mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue;
 
+	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac;
+		mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan;
+		mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config;
+		mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state;
+		mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats;
+	}
+
 	netdev->netdev_ops        = &mlx5e_netdev_ops;
 	netdev->watchdog_timeo    = 15 * HZ;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 713ead583347..23c244a7e5d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -35,6 +35,9 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
 
 enum {
 	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
@@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			break;
 #endif
 
+#ifdef CONFIG_MLX5_CORE_EN
+		case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+			mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
+			break;
+#endif
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, pg))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
 
+	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
+	    MLX5_CAP_GEN(dev, vport_group_manager) &&
+	    mlx5_core_is_pf(dev))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
new file mode 100644
index 000000000000..bc3d9f8a75c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -0,0 +1,1097 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+#define UPLINK_VPORT 0xFFFF
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...)				\
+	pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...)				\
+	pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...)				\
+	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+enum {
+	MLX5_ACTION_NONE = 0,
+	MLX5_ACTION_ADD  = 1,
+	MLX5_ACTION_DEL  = 2,
+};
+
+/* E-Switch UC L2 table hash node */
+struct esw_uc_addr {
+	struct l2addr_node node;
+	u32                table_index;
+	u32                vport;
+};
+
+/* E-Switch MC FDB table hash node */
+struct esw_mc_addr { /* SRIOV only */
+	struct l2addr_node     node;
+	struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
+	u32                    refcnt;
+};
+
+/* Vport UC/MC hash node */
+struct vport_addr {
+	struct l2addr_node     node;
+	u8                     action;
+	u32                    vport;
+	struct mlx5_flow_rule *flow_rule; /* SRIOV only */
+};
+
+enum {
+	UC_ADDR_CHANGE = BIT(0),
+	MC_ADDR_CHANGE = BIT(1),
+};
+
+/* Vport context events */
+#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
+			    MC_ADDR_CHANGE)
+
+static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
+					u32 events_mask)
+{
+	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
+	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	void *nic_vport_ctx;
+	int err;
+
+	memset(out, 0, sizeof(out));
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+	MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+				     in, nic_vport_context);
+
+	MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
+
+	if (events_mask & UC_ADDR_CHANGE)
+		MLX5_SET(nic_vport_context, nic_vport_ctx,
+			 event_on_uc_address_change, 1);
+	if (events_mask & MC_ADDR_CHANGE)
+		MLX5_SET(nic_vport_context, nic_vport_ctx,
+			 event_on_mc_address_change, 1);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		goto ex;
+	err = mlx5_cmd_status_to_err_v2(out);
+	if (err)
+		goto ex;
+	return 0;
+ex:
+	return err;
+}
+
+/* E-Switch vport context HW commands */
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
+				       u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+
+	MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
+
+	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+}
+
+static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+				 u16 *vlan, u8 *qos)
+{
+	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
+	int err;
+	bool cvlan_strip;
+	bool cvlan_insert;
+
+	memset(out, 0, sizeof(out));
+
+	*vlan = 0;
+	*qos = 0;
+
+	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+		return -ENOTSUPP;
+
+	err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
+	if (err)
+		goto out;
+
+	cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
+			       esw_vport_context.vport_cvlan_strip);
+
+	cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
+				esw_vport_context.vport_cvlan_insert);
+
+	if (cvlan_strip || cvlan_insert) {
+		*vlan = MLX5_GET(query_esw_vport_context_out, out,
+				 esw_vport_context.cvlan_id);
+		*qos = MLX5_GET(query_esw_vport_context_out, out,
+				esw_vport_context.cvlan_pcp);
+	}
+
+	esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
+		  vport, *vlan, *qos);
+out:
+	return err;
+}
+
+static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+					void *in, int inlen)
+{
+	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
+
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+
+	MLX5_SET(modify_esw_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
+
+	return mlx5_cmd_exec_check_status(dev, in, inlen,
+					  out, sizeof(out));
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+				  u16 vlan, u8 qos, bool set)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
+
+	memset(in, 0, sizeof(in));
+
+	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+		return -ENOTSUPP;
+
+	esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
+		  vport, vlan, qos, set);
+
+	if (set) {
+		MLX5_SET(modify_esw_vport_context_in, in,
+			 esw_vport_context.vport_cvlan_strip, 1);
+		/* insert only if no vlan in packet */
+		MLX5_SET(modify_esw_vport_context_in, in,
+			 esw_vport_context.vport_cvlan_insert, 1);
+		MLX5_SET(modify_esw_vport_context_in, in,
+			 esw_vport_context.cvlan_pcp, qos);
+		MLX5_SET(modify_esw_vport_context_in, in,
+			 esw_vport_context.cvlan_id, vlan);
+	}
+
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 field_select.vport_cvlan_strip, 1);
+	MLX5_SET(modify_esw_vport_context_in, in,
+		 field_select.vport_cvlan_insert, 1);
+
+	return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
+}
+
+/* HW L2 Table (MPFS) management */
+static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
+				  u8 *mac, u8 vlan_valid, u16 vlan)
+{
+	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
+	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+	u8 *in_mac_addr;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(set_l2_table_entry_in, in, opcode,
+		 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+	MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid);
+	MLX5_SET(set_l2_table_entry_in, in, vlan, vlan);
+
+	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+	ether_addr_copy(&in_mac_addr[2], mac);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+					  out, sizeof(out));
+}
+
+static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
+	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(delete_l2_table_entry_in, in, opcode,
+		 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
+					  out, sizeof(out));
+}
+
+static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
+{
+	int err = 0;
+
+	*ix = find_first_zero_bit(l2_table->bitmap, l2_table->size);
+	if (*ix >= l2_table->size)
+		err = -ENOSPC;
+	else
+		__set_bit(*ix, l2_table->bitmap);
+
+	return err;
+}
+
+static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix)
+{
+	__clear_bit(ix, l2_table->bitmap);
+}
+
+static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac,
+			      u8 vlan_valid, u16 vlan,
+			      u32 *index)
+{
+	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
+	int err;
+
+	err = alloc_l2_table_index(l2_table, index);
+	if (err)
+		return err;
+
+	err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan);
+	if (err)
+		free_l2_table_index(l2_table, *index);
+
+	return err;
+}
+
+static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
+{
+	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
+
+	del_l2_table_entry_cmd(dev, index);
+	free_l2_table_index(l2_table, index);
+}
+
+/* E-Switch FDB */
+static struct mlx5_flow_rule *
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+{
+	int match_header = MLX5_MATCH_OUTER_HEADERS;
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *flow_rule = NULL;
+	u32 *match_v;
+	u32 *match_c;
+	u8 *dmac_v;
+	u8 *dmac_c;
+
+	match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+	if (!match_v || !match_c) {
+		pr_warn("FDB: Failed to alloc match parameters\n");
+		goto out;
+	}
+	dmac_v = MLX5_ADDR_OF(fte_match_param, match_v,
+			      outer_headers.dmac_47_16);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, match_c,
+			      outer_headers.dmac_47_16);
+
+	ether_addr_copy(dmac_v, mac);
+	/* Match criteria mask */
+	memset(dmac_c, 0xff, 6);
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport_num = vport;
+
+	esw_debug(esw->dev,
+		  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
+		  dmac_v, dmac_c, vport);
+	flow_rule =
+		mlx5_add_flow_rule(esw->fdb_table.fdb,
+				   match_header,
+				   match_c,
+				   match_v,
+				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+				   0, &dest);
+	if (IS_ERR_OR_NULL(flow_rule)) {
+		pr_warn(
+			"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
+		flow_rule = NULL;
+	}
+out:
+	kfree(match_v);
+	kfree(match_c);
+	return flow_rule;
+}
+
+static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb;
+	struct mlx5_flow_group *g;
+	void *match_criteria;
+	int table_size;
+	u32 *flow_group_in;
+	u8 *dmac;
+	int err = 0;
+
+	esw_debug(dev, "Create FDB log_max_size(%d)\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -ENOMEM;
+	}
+
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+	memset(flow_group_in, 0, inlen);
+
+	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+	fdb = mlx5_create_flow_table(root_ns, 0, table_size);
+	if (IS_ERR_OR_NULL(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+		goto out;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+	eth_broadcast_addr(dmac);
+
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR_OR_NULL(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+		goto out;
+	}
+
+	esw->fdb_table.addr_grp = g;
+	esw->fdb_table.fdb = fdb;
+out:
+	kfree(flow_group_in);
+	if (err && !IS_ERR_OR_NULL(fdb))
+		mlx5_destroy_flow_table(fdb);
+	return err;
+}
+
+static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
+{
+	if (!esw->fdb_table.fdb)
+		return;
+
+	esw_debug(esw->dev, "Destroy FDB Table\n");
+	mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+	esw->fdb_table.fdb = NULL;
+	esw->fdb_table.addr_grp = NULL;
+}
+
+/* E-Switch vport UC/MC lists management */
+typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
+				 struct vport_addr *vaddr);
+
+static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+	struct hlist_head *hash = esw->l2_table.l2_hash;
+	struct esw_uc_addr *esw_uc;
+	u8 *mac = vaddr->node.addr;
+	u32 vport = vaddr->vport;
+	int err;
+
+	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
+	if (esw_uc) {
+		esw_warn(esw->dev,
+			 "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
+			 mac, vport, esw_uc->vport);
+		return -EEXIST;
+	}
+
+	esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
+	if (!esw_uc)
+		return -ENOMEM;
+	esw_uc->vport = vport;
+
+	err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index);
+	if (err)
+		goto abort;
+
+	if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
+		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+
+	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
+		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
+	return err;
+abort:
+	l2addr_hash_del(esw_uc);
+	return err;
+}
+
+static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+	struct hlist_head *hash = esw->l2_table.l2_hash;
+	struct esw_uc_addr *esw_uc;
+	u8 *mac = vaddr->node.addr;
+	u32 vport = vaddr->vport;
+
+	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
+	if (!esw_uc || esw_uc->vport != vport) {
+		esw_debug(esw->dev,
+			  "MAC(%pM) doesn't belong to vport (%d)\n",
+			  mac, vport);
+		return -EINVAL;
+	}
+	esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
+		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
+
+	del_l2_table_entry(esw->dev, esw_uc->table_index);
+
+	if (vaddr->flow_rule)
+		mlx5_del_flow_rule(vaddr->flow_rule);
+	vaddr->flow_rule = NULL;
+
+	l2addr_hash_del(esw_uc);
+	return 0;
+}
+
+static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+	struct hlist_head *hash = esw->mc_table;
+	struct esw_mc_addr *esw_mc;
+	u8 *mac = vaddr->node.addr;
+	u32 vport = vaddr->vport;
+
+	if (!esw->fdb_table.fdb)
+		return 0;
+
+	esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+	if (esw_mc)
+		goto add;
+
+	esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
+	if (!esw_mc)
+		return -ENOMEM;
+
+	esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
+		esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+add:
+	esw_mc->refcnt++;
+	/* Forward MC MAC to vport */
+	vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+	esw_debug(esw->dev,
+		  "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+		  vport, mac, vaddr->flow_rule,
+		  esw_mc->refcnt, esw_mc->uplink_rule);
+	return 0;
+}
+
+static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+	struct hlist_head *hash = esw->mc_table;
+	struct esw_mc_addr *esw_mc;
+	u8 *mac = vaddr->node.addr;
+	u32 vport = vaddr->vport;
+
+	if (!esw->fdb_table.fdb)
+		return 0;
+
+	esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+	if (!esw_mc) {
+		esw_warn(esw->dev,
+			 "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
+			 mac, vport);
+		return -EINVAL;
+	}
+	esw_debug(esw->dev,
+		  "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+		  vport, mac, vaddr->flow_rule, esw_mc->refcnt,
+		  esw_mc->uplink_rule);
+
+	if (vaddr->flow_rule)
+		mlx5_del_flow_rule(vaddr->flow_rule);
+	vaddr->flow_rule = NULL;
+
+	if (--esw_mc->refcnt)
+		return 0;
+
+	if (esw_mc->uplink_rule)
+		mlx5_del_flow_rule(esw_mc->uplink_rule);
+
+	l2addr_hash_del(esw_mc);
+	return 0;
+}
+
+/* Apply vport UC/MC list to HW l2 table and FDB table */
+static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
+				      u32 vport_num, int list_type)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+	vport_addr_action vport_addr_add;
+	vport_addr_action vport_addr_del;
+	struct vport_addr *addr;
+	struct l2addr_node *node;
+	struct hlist_head *hash;
+	struct hlist_node *tmp;
+	int hi;
+
+	vport_addr_add = is_uc ? esw_add_uc_addr :
+				 esw_add_mc_addr;
+	vport_addr_del = is_uc ? esw_del_uc_addr :
+				 esw_del_mc_addr;
+
+	hash = is_uc ? vport->uc_list : vport->mc_list;
+	for_each_l2hash_node(node, tmp, hash, hi) {
+		addr = container_of(node, struct vport_addr, node);
+		switch (addr->action) {
+		case MLX5_ACTION_ADD:
+			vport_addr_add(esw, addr);
+			addr->action = MLX5_ACTION_NONE;
+			break;
+		case MLX5_ACTION_DEL:
+			vport_addr_del(esw, addr);
+			l2addr_hash_del(addr);
+			break;
+		}
+	}
+}
+
+/* Sync vport UC/MC list from vport context */
+static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
+				       u32 vport_num, int list_type)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+	u8 (*mac_list)[ETH_ALEN];
+	struct l2addr_node *node;
+	struct vport_addr *addr;
+	struct hlist_head *hash;
+	struct hlist_node *tmp;
+	int size;
+	int err;
+	int hi;
+	int i;
+
+	size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
+		       MLX5_MAX_MC_PER_VPORT(esw->dev);
+
+	mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+	if (!mac_list)
+		return;
+
+	hash = is_uc ? vport->uc_list : vport->mc_list;
+
+	for_each_l2hash_node(node, tmp, hash, hi) {
+		addr = container_of(node, struct vport_addr, node);
+		addr->action = MLX5_ACTION_DEL;
+	}
+
+	err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
+					    mac_list, &size);
+	if (err)
+		return;
+	esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
+		  vport_num, is_uc ? "UC" : "MC", size);
+
+	for (i = 0; i < size; i++) {
+		if (is_uc && !is_valid_ether_addr(mac_list[i]))
+			continue;
+
+		if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
+			continue;
+
+		addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
+		if (addr) {
+			addr->action = MLX5_ACTION_NONE;
+			continue;
+		}
+
+		addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
+				       GFP_KERNEL);
+		if (!addr) {
+			esw_warn(esw->dev,
+				 "Failed to add MAC(%pM) to vport[%d] DB\n",
+				 mac_list[i], vport_num);
+			continue;
+		}
+		addr->vport = vport_num;
+		addr->action = MLX5_ACTION_ADD;
+	}
+	kfree(mac_list);
+}
+
+static void esw_vport_change_handler(struct work_struct *work)
+{
+	struct mlx5_vport *vport =
+		container_of(work, struct mlx5_vport, vport_change_handler);
+	struct mlx5_core_dev *dev = vport->dev;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	u8 mac[ETH_ALEN];
+
+	mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+	esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
+		  vport->vport, mac);
+
+	if (vport->enabled_events & UC_ADDR_CHANGE) {
+		esw_update_vport_addr_list(esw, vport->vport,
+					   MLX5_NVPRT_LIST_TYPE_UC);
+		esw_apply_vport_addr_list(esw, vport->vport,
+					  MLX5_NVPRT_LIST_TYPE_UC);
+	}
+
+	if (vport->enabled_events & MC_ADDR_CHANGE) {
+		esw_update_vport_addr_list(esw, vport->vport,
+					   MLX5_NVPRT_LIST_TYPE_MC);
+		esw_apply_vport_addr_list(esw, vport->vport,
+					  MLX5_NVPRT_LIST_TYPE_MC);
+	}
+
+	esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
+	if (vport->enabled)
+		arm_vport_context_events_cmd(dev, vport->vport,
+					     vport->enabled_events);
+}
+
+static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+			     int enable_events)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	unsigned long flags;
+
+	WARN_ON(vport->enabled);
+
+	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+	mlx5_modify_vport_admin_state(esw->dev,
+				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+				      vport_num,
+				      MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+
+	/* Sync with current vport context */
+	vport->enabled_events = enable_events;
+	esw_vport_change_handler(&vport->vport_change_handler);
+
+	spin_lock_irqsave(&vport->lock, flags);
+	vport->enabled = true;
+	spin_unlock_irqrestore(&vport->lock, flags);
+
+	arm_vport_context_events_cmd(esw->dev, vport_num, enable_events);
+
+	esw->enabled_vports++;
+	esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+}
+
+static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	struct l2addr_node *node;
+	struct vport_addr *addr;
+	struct hlist_node *tmp;
+	int hi;
+
+	for_each_l2hash_node(node, tmp, vport->uc_list, hi) {
+		addr = container_of(node, struct vport_addr, node);
+		addr->action = MLX5_ACTION_DEL;
+	}
+	esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC);
+
+	for_each_l2hash_node(node, tmp, vport->mc_list, hi) {
+		addr = container_of(node, struct vport_addr, node);
+		addr->action = MLX5_ACTION_DEL;
+	}
+	esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC);
+}
+
+static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	unsigned long flags;
+
+	if (!vport->enabled)
+		return;
+
+	esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
+	/* Mark this vport as disabled to discard new events */
+	spin_lock_irqsave(&vport->lock, flags);
+	vport->enabled = false;
+	vport->enabled_events = 0;
+	spin_unlock_irqrestore(&vport->lock, flags);
+
+	mlx5_modify_vport_admin_state(esw->dev,
+				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+				      vport_num,
+				      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+	/* Wait for current already scheduled events to complete */
+	flush_workqueue(esw->work_queue);
+	/* Disable events from this vport */
+	arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+	/* We don't assume VFs will cleanup after themselves */
+	esw_cleanup_vport(esw, vport_num);
+	esw->enabled_vports--;
+}
+
+/* Public E-Switch API */
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
+{
+	int err;
+	int i;
+
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return 0;
+
+	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+		return -ENOTSUPP;
+	}
+
+	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs);
+
+	esw_disable_vport(esw, 0);
+
+	err = esw_create_fdb_table(esw, nvfs + 1);
+	if (err)
+		goto abort;
+
+	for (i = 0; i <= nvfs; i++)
+		esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS);
+
+	esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
+		 esw->enabled_vports);
+	return 0;
+
+abort:
+	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+	return err;
+}
+
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+{
+	int i;
+
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
+	esw_info(esw->dev, "disable SRIOV: active vports(%d)\n",
+		 esw->enabled_vports);
+
+	for (i = 0; i < esw->total_vports; i++)
+		esw_disable_vport(esw, i);
+
+	esw_destroy_fdb_table(esw);
+
+	/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
+	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+}
+
+int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+{
+	int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+	int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev);
+	struct mlx5_eswitch *esw;
+	int vport_num;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return 0;
+
+	esw_info(dev,
+		 "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
+		 total_vports, l2_table_size,
+		 MLX5_MAX_UC_PER_VPORT(dev),
+		 MLX5_MAX_MC_PER_VPORT(dev));
+
+	esw = kzalloc(sizeof(*esw), GFP_KERNEL);
+	if (!esw)
+		return -ENOMEM;
+
+	esw->dev = dev;
+
+	esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
+				   sizeof(uintptr_t), GFP_KERNEL);
+	if (!esw->l2_table.bitmap) {
+		err = -ENOMEM;
+		goto abort;
+	}
+	esw->l2_table.size = l2_table_size;
+
+	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
+	if (!esw->work_queue) {
+		err = -ENOMEM;
+		goto abort;
+	}
+
+	esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
+			      GFP_KERNEL);
+	if (!esw->vports) {
+		err = -ENOMEM;
+		goto abort;
+	}
+
+	for (vport_num = 0; vport_num < total_vports; vport_num++) {
+		struct mlx5_vport *vport = &esw->vports[vport_num];
+
+		vport->vport = vport_num;
+		vport->dev = dev;
+		INIT_WORK(&vport->vport_change_handler,
+			  esw_vport_change_handler);
+		spin_lock_init(&vport->lock);
+	}
+
+	esw->total_vports = total_vports;
+	esw->enabled_vports = 0;
+
+	dev->priv.eswitch = esw;
+	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+	/* VF Vports will be enabled when SRIOV is enabled */
+	return 0;
+abort:
+	if (esw->work_queue)
+		destroy_workqueue(esw->work_queue);
+	kfree(esw->l2_table.bitmap);
+	kfree(esw->vports);
+	kfree(esw);
+	return err;
+}
+
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+{
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
+	esw_info(esw->dev, "cleanup\n");
+	esw_disable_vport(esw, 0);
+
+	esw->dev->priv.eswitch = NULL;
+	destroy_workqueue(esw->work_queue);
+	kfree(esw->l2_table.bitmap);
+	kfree(esw->vports);
+	kfree(esw);
+}
+
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
+{
+	struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
+	u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
+	struct mlx5_vport *vport;
+
+	if (!esw) {
+		pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
+			vport_num);
+		return;
+	}
+
+	vport = &esw->vports[vport_num];
+	spin_lock(&vport->lock);
+	if (vport->enabled)
+		queue_work(esw->work_queue, &vport->vport_change_handler);
+	spin_unlock(&vport->lock);
+}
+
+/* Vport Administration */
+#define ESW_ALLOWED(esw) \
+	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
+#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+			       int vport, u8 mac[ETH_ALEN])
+{
+	int err = 0;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport))
+		return -EINVAL;
+
+	err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+	if (err) {
+		mlx5_core_warn(esw->dev,
+			       "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
+			       vport, err);
+		return err;
+	}
+
+	return err;
+}
+
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+				 int vport, int link_state)
+{
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport))
+		return -EINVAL;
+
+	return mlx5_modify_vport_admin_state(esw->dev,
+					     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					     vport, link_state);
+}
+
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+				  int vport, struct ifla_vf_info *ivi)
+{
+	u16 vlan;
+	u8 qos;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport))
+		return -EINVAL;
+
+	memset(ivi, 0, sizeof(*ivi));
+	ivi->vf = vport - 1;
+
+	mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
+	ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
+						      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+						      vport);
+	query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
+	ivi->vlan = vlan;
+	ivi->qos = qos;
+	ivi->spoofchk = 0;
+
+	return 0;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				int vport, u16 vlan, u8 qos)
+{
+	int set = 0;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+
+	if (vlan || qos)
+		set = 1;
+
+	return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
+}
+
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+				 int vport,
+				 struct ifla_vf_stats *vf_stats)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+	int err = 0;
+	u32 *out;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport))
+		return -EINVAL;
+
+	out = mlx5_vzalloc(outlen);
+	if (!out)
+		return -ENOMEM;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(query_vport_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+	MLX5_SET(query_vport_counter_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+
+	memset(out, 0, outlen);
+	err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+	if (err)
+		goto free_out;
+
+	#define MLX5_GET_CTR(p, x) \
+		MLX5_GET64(query_vport_counter_out, p, x)
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+	vf_stats->rx_packets =
+		MLX5_GET_CTR(out, received_eth_unicast.packets) +
+		MLX5_GET_CTR(out, received_eth_multicast.packets) +
+		MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+	vf_stats->rx_bytes =
+		MLX5_GET_CTR(out, received_eth_unicast.octets) +
+		MLX5_GET_CTR(out, received_eth_multicast.octets) +
+		MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+	vf_stats->tx_packets =
+		MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+		MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+		MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+	vf_stats->tx_bytes =
+		MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+		MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+		MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+	vf_stats->multicast =
+		MLX5_GET_CTR(out, received_eth_multicast.packets);
+
+	vf_stats->broadcast =
+		MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+free_out:
+	kvfree(out);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
new file mode 100644
index 000000000000..3416a428f70f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_ESWITCH_H__
+#define __MLX5_ESWITCH_H__
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/mlx5/device.h>
+
+#define MLX5_MAX_UC_PER_VPORT(dev) \
+	(1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
+
+#define MLX5_MAX_MC_PER_VPORT(dev) \
+	(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
+
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+/* L2 -mac address based- hash helpers */
+struct l2addr_node {
+	struct hlist_node hlist;
+	u8                addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+	for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({                \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	bool found = false;                                 \
+	type *ptr = NULL;                                   \
+							    \
+	hlist_for_each_entry(ptr, &hash[ix], node.hlist)    \
+		if (ether_addr_equal(ptr->node.addr, mac)) {\
+			found = true;                       \
+			break;                              \
+		}                                           \
+	if (!found)                                         \
+		ptr = NULL;                                 \
+	ptr;                                                \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({            \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	type *ptr = NULL;                                   \
+							    \
+	ptr = kzalloc(sizeof(type), gfp);                   \
+	if (ptr) {                                          \
+		ether_addr_copy(ptr->node.addr, mac);       \
+		hlist_add_head(&ptr->node.hlist, &hash[ix]);\
+	}                                                   \
+	ptr;                                                \
+})
+
+#define l2addr_hash_del(ptr) ({                             \
+	hlist_del(&ptr->node.hlist);                        \
+	kfree(ptr);                                         \
+})
+
+struct mlx5_vport {
+	struct mlx5_core_dev    *dev;
+	int                     vport;
+	struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
+	struct hlist_head       mc_list[MLX5_L2_ADDR_HASH_SIZE];
+	struct work_struct      vport_change_handler;
+
+	/* This spinlock protects access to vport data, between
+	 * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event"
+	 * once vport marked as disabled new interrupts are discarded.
+	 */
+	spinlock_t              lock; /* vport events sync */
+	bool                    enabled;
+	u16                     enabled_events;
+};
+
+struct mlx5_l2_table {
+	struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE];
+	u32                  size;
+	unsigned long        *bitmap;
+};
+
+struct mlx5_eswitch_fdb {
+	void *fdb;
+	struct mlx5_flow_group *addr_grp;
+};
+
+struct mlx5_eswitch {
+	struct mlx5_core_dev    *dev;
+	struct mlx5_l2_table    l2_table;
+	struct mlx5_eswitch_fdb fdb_table;
+	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
+	struct workqueue_struct *work_queue;
+	struct mlx5_vport       *vports;
+	int                     total_vports;
+	int                     enabled_vports;
+};
+
+/* E-Switch API */
+int mlx5_eswitch_init(struct mlx5_core_dev *dev);
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+			       int vport, u8 mac[ETH_ALEN]);
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+				 int vport, int link_state);
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				int vport, u16 vlan, u8 qos);
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+				  int vport, struct ifla_vf_info *ivi);
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+				 int vport,
+				 struct ifla_vf_stats *vf_stats);
+
+#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c
deleted file mode 100644
index ca90b9bc3b95..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/export.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/flow_table.h>
-#include "mlx5_core.h"
-
-struct mlx5_ftg {
-	struct mlx5_flow_table_group    g;
-	u32				id;
-	u32				start_ix;
-};
-
-struct mlx5_flow_table {
-	struct mlx5_core_dev	*dev;
-	u8			level;
-	u8			type;
-	u32			id;
-	struct mutex		mutex; /* sync bitmap alloc */
-	u16			num_groups;
-	struct mlx5_ftg		*group;
-	unsigned long		*bitmap;
-	u32			size;
-};
-
-static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix,
-				   u32 flow_index, void *flow_context)
-{
-	u32 out[MLX5_ST_SZ_DW(set_fte_out)];
-	u32 *in;
-	void *in_flow_context;
-	int fcdls =
-		MLX5_GET(flow_context, flow_context, destination_list_size) *
-		MLX5_ST_SZ_BYTES(dest_format_struct);
-	int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls;
-	int err;
-
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_warn(ft->dev, "failed to allocate inbox\n");
-		return -ENOMEM;
-	}
-
-	MLX5_SET(set_fte_in, in, table_type, ft->type);
-	MLX5_SET(set_fte_in, in, table_id,   ft->id);
-	MLX5_SET(set_fte_in, in, flow_index, flow_index);
-	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
-
-	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
-	memcpy(in_flow_context, flow_context,
-	       MLX5_ST_SZ_BYTES(flow_context) + fcdls);
-
-	MLX5_SET(flow_context, in_flow_context, group_id,
-		 ft->group[group_ix].id);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out,
-					 sizeof(out));
-	kvfree(in);
-
-	return err;
-}
-
-static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index)
-{
-	u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
-	u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
-#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v)
-	MLX5_SET_DFTEI(in, table_type, ft->type);
-	MLX5_SET_DFTEI(in, table_id,   ft->id);
-	MLX5_SET_DFTEI(in, flow_index, flow_index);
-	MLX5_SET_DFTEI(in, opcode,     MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
-
-	mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out));
-}
-
-static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
-#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v)
-	MLX5_SET_DFGI(in, table_type, ft->type);
-	MLX5_SET_DFGI(in, table_id,   ft->id);
-	MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
-	MLX5_SET_DFGI(in, group_id, ft->group[i].id);
-	mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i)
-{
-	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
-	u32 *in;
-	void *in_match_criteria;
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_table_group *g = &ft->group[i].g;
-	u32 start_ix = ft->group[i].start_ix;
-	u32 end_ix = start_ix + (1 << g->log_sz) - 1;
-	int err;
-
-	in = mlx5_vzalloc(inlen);
-	if (!in) {
-		mlx5_core_warn(ft->dev, "failed to allocate inbox\n");
-		return -ENOMEM;
-	}
-	in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in,
-					 match_criteria);
-
-	memset(out, 0, sizeof(out));
-
-#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v)
-	MLX5_SET_CFGI(in, table_type,            ft->type);
-	MLX5_SET_CFGI(in, table_id,              ft->id);
-	MLX5_SET_CFGI(in, opcode,                MLX5_CMD_OP_CREATE_FLOW_GROUP);
-	MLX5_SET_CFGI(in, start_flow_index,      start_ix);
-	MLX5_SET_CFGI(in, end_flow_index,        end_ix);
-	MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable);
-
-	memcpy(in_match_criteria, g->match_criteria,
-	       MLX5_ST_SZ_BYTES(fte_match_param));
-
-	err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out,
-					 sizeof(out));
-	if (!err)
-		ft->group[i].id = MLX5_GET(create_flow_group_out, out,
-					   group_id);
-
-	kvfree(in);
-
-	return err;
-}
-
-static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft)
-{
-	int i;
-
-	for (i = 0; i < ft->num_groups; i++)
-		mlx5_destroy_flow_group_cmd(ft, i);
-}
-
-static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft)
-{
-	int err;
-	int i;
-
-	for (i = 0; i < ft->num_groups; i++) {
-		err = mlx5_create_flow_group_cmd(ft, i);
-		if (err)
-			goto err_destroy_flow_table_groups;
-	}
-
-	return 0;
-
-err_destroy_flow_table_groups:
-	for (i--; i >= 0; i--)
-		mlx5_destroy_flow_group_cmd(ft, i);
-
-	return err;
-}
-
-static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft)
-{
-	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
-	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
-	int err;
-
-	memset(in, 0, sizeof(in));
-
-	MLX5_SET(create_flow_table_in, in, table_type, ft->type);
-	MLX5_SET(create_flow_table_in, in, level,      ft->level);
-	MLX5_SET(create_flow_table_in, in, log_size,   order_base_2(ft->size));
-
-	MLX5_SET(create_flow_table_in, in, opcode,
-		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out,
-					 sizeof(out));
-	if (err)
-		return err;
-
-	ft->id = MLX5_GET(create_flow_table_out, out, table_id);
-
-	return 0;
-}
-
-static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
-#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v)
-	MLX5_SET_DFTI(in, table_type, ft->type);
-	MLX5_SET_DFTI(in, table_id,   ft->id);
-	MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE);
-
-	mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable,
-			   u32 *match_criteria, int *group_ix)
-{
-	void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria,
-				      outer_headers);
-	void *mc_misc  = MLX5_ADDR_OF(fte_match_param, match_criteria,
-				      misc_parameters);
-	void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria,
-				      inner_headers);
-	int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4);
-	int mc_misc_sz  = MLX5_ST_SZ_BYTES(fte_match_set_misc);
-	int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4);
-	int i;
-
-	for (i = 0; i < ft->num_groups; i++) {
-		struct mlx5_flow_table_group *g = &ft->group[i].g;
-		void *gmc_outer = MLX5_ADDR_OF(fte_match_param,
-					       g->match_criteria,
-					       outer_headers);
-		void *gmc_misc  = MLX5_ADDR_OF(fte_match_param,
-					       g->match_criteria,
-					       misc_parameters);
-		void *gmc_inner = MLX5_ADDR_OF(fte_match_param,
-					       g->match_criteria,
-					       inner_headers);
-
-		if (g->match_criteria_enable != match_criteria_enable)
-			continue;
-
-		if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS)
-			if (memcmp(mc_outer, gmc_outer, mc_outer_sz))
-				continue;
-
-		if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS)
-			if (memcmp(mc_misc, gmc_misc, mc_misc_sz))
-				continue;
-
-		if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS)
-			if (memcmp(mc_inner, gmc_inner, mc_inner_sz))
-				continue;
-
-		*group_ix = i;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix)
-{
-	struct mlx5_ftg *g = &ft->group[group_ix];
-	int err = 0;
-
-	mutex_lock(&ft->mutex);
-
-	*ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix);
-	if (*ix >= (g->start_ix + (1 << g->g.log_sz)))
-		err = -ENOSPC;
-	else
-		__set_bit(*ix, ft->bitmap);
-
-	mutex_unlock(&ft->mutex);
-
-	return err;
-}
-
-static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix)
-{
-	__clear_bit(ix, ft->bitmap);
-}
-
-int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable,
-			      void *match_criteria, void *flow_context,
-			      u32 *flow_index)
-{
-	struct mlx5_flow_table *ft = flow_table;
-	int group_ix;
-	int err;
-
-	err = mlx5_find_group(ft, match_criteria_enable, match_criteria,
-			      &group_ix);
-	if (err) {
-		mlx5_core_warn(ft->dev, "mlx5_find_group failed\n");
-		return err;
-	}
-
-	err = alloc_flow_index(ft, group_ix, flow_index);
-	if (err) {
-		mlx5_core_warn(ft->dev, "alloc_flow_index failed\n");
-		return err;
-	}
-
-	return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context);
-}
-EXPORT_SYMBOL(mlx5_add_flow_table_entry);
-
-void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index)
-{
-	struct mlx5_flow_table *ft = flow_table;
-
-	mlx5_del_flow_entry_cmd(ft, flow_index);
-	mlx5_free_flow_index(ft, flow_index);
-}
-EXPORT_SYMBOL(mlx5_del_flow_table_entry);
-
-void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type,
-			     u16 num_groups,
-			     struct mlx5_flow_table_group *group)
-{
-	struct mlx5_flow_table *ft;
-	u32 start_ix = 0;
-	u32 ft_size = 0;
-	void *gr;
-	void *bm;
-	int err;
-	int i;
-
-	for (i = 0; i < num_groups; i++)
-		ft_size += (1 << group[i].log_sz);
-
-	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
-	gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL);
-	bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL);
-	if (!ft || !gr || !bm)
-		goto err_free_ft;
-
-	ft->group	= gr;
-	ft->bitmap	= bm;
-	ft->num_groups	= num_groups;
-	ft->level	= level;
-	ft->type	= table_type;
-	ft->size	= ft_size;
-	ft->dev		= dev;
-	mutex_init(&ft->mutex);
-
-	for (i = 0; i < ft->num_groups; i++) {
-		memcpy(&ft->group[i].g, &group[i], sizeof(*group));
-		ft->group[i].start_ix = start_ix;
-		start_ix += 1 << group[i].log_sz;
-	}
-
-	err = mlx5_create_flow_table_cmd(ft);
-	if (err)
-		goto err_free_ft;
-
-	err = mlx5_create_flow_table_groups(ft);
-	if (err)
-		goto err_destroy_flow_table_cmd;
-
-	return ft;
-
-err_destroy_flow_table_cmd:
-	mlx5_destroy_flow_table_cmd(ft);
-
-err_free_ft:
-	mlx5_core_warn(dev, "failed to alloc flow table\n");
-	kfree(bm);
-	kfree(gr);
-	kfree(ft);
-
-	return NULL;
-}
-EXPORT_SYMBOL(mlx5_create_flow_table);
-
-void mlx5_destroy_flow_table(void *flow_table)
-{
-	struct mlx5_flow_table *ft = flow_table;
-
-	mlx5_destroy_flow_table_groups(ft);
-	mlx5_destroy_flow_table_cmd(ft);
-	kfree(ft->bitmap);
-	kfree(ft->group);
-	kfree(ft);
-}
-EXPORT_SYMBOL(mlx5_destroy_flow_table);
-
-u32 mlx5_get_flow_table_id(void *flow_table)
-{
-	struct mlx5_flow_table *ft = flow_table;
-
-	return ft->id;
-}
-EXPORT_SYMBOL(mlx5_get_flow_table_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
new file mode 100644
index 000000000000..5096f4f336bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5_core.h"
+
+int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+			       enum fs_flow_table_type type, unsigned int level,
+			       unsigned int log_size, unsigned int *table_id)
+{
+	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
+	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(create_flow_table_in, in, opcode,
+		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
+
+	MLX5_SET(create_flow_table_in, in, table_type, type);
+	MLX5_SET(create_flow_table_in, in, level, level);
+	MLX5_SET(create_flow_table_in, in, log_size, log_size);
+
+	memset(out, 0, sizeof(out));
+	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					 sizeof(out));
+
+	if (!err)
+		*table_id = MLX5_GET(create_flow_table_out, out,
+				     table_id);
+	return err;
+}
+
+int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+				struct mlx5_flow_table *ft)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(destroy_flow_table_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+	MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
+	MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					  sizeof(out));
+}
+
+int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       u32 *in,
+			       unsigned int *group_id)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
+	int err;
+
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(create_flow_group_in, in, opcode,
+		 MLX5_CMD_OP_CREATE_FLOW_GROUP);
+	MLX5_SET(create_flow_group_in, in, table_type, ft->type);
+	MLX5_SET(create_flow_group_in, in, table_id, ft->id);
+
+	err = mlx5_cmd_exec_check_status(dev, in,
+					 inlen, out,
+					 sizeof(out));
+	if (!err)
+		*group_id = MLX5_GET(create_flow_group_out, out,
+				     group_id);
+
+	return err;
+}
+
+int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+				struct mlx5_flow_table *ft,
+				unsigned int group_id)
+{
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(destroy_flow_group_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+	MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
+	MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
+	MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+
+	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+					  sizeof(out));
+}
+
+static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
+			    int opmod, int modify_mask,
+			    struct mlx5_flow_table *ft,
+			    unsigned group_id,
+			    struct fs_fte *fte)
+{
+	unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
+	u32 out[MLX5_ST_SZ_DW(set_fte_out)];
+	struct mlx5_flow_rule *dst;
+	void *in_flow_context;
+	void *in_match_value;
+	void *in_dests;
+	u32 *in;
+	int err;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		mlx5_core_warn(dev, "failed to allocate inbox\n");
+		return -ENOMEM;
+	}
+
+	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+	MLX5_SET(set_fte_in, in, op_mod, opmod);
+	MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+	MLX5_SET(set_fte_in, in, table_type, ft->type);
+	MLX5_SET(set_fte_in, in, table_id,   ft->id);
+	MLX5_SET(set_fte_in, in, flow_index, fte->index);
+
+	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
+	MLX5_SET(flow_context, in_flow_context, action, fte->action);
+	MLX5_SET(flow_context, in_flow_context, destination_list_size,
+		 fte->dests_size);
+	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+				      match_value);
+	memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
+
+	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+	list_for_each_entry(dst, &fte->node.children, node.list) {
+		unsigned int id;
+
+		MLX5_SET(dest_format_struct, in_dests, destination_type,
+			 dst->dest_attr.type);
+		if (dst->dest_attr.type ==
+		    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+			id = dst->dest_attr.ft->id;
+		else
+			id = dst->dest_attr.tir_num;
+		MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+		in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+	}
+	memset(out, 0, sizeof(out));
+	err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
+					 sizeof(out));
+	kvfree(in);
+
+	return err;
+}
+
+int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned group_id,
+			struct fs_fte *fte)
+{
+	return	mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+}
+
+int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned group_id,
+			struct fs_fte *fte)
+{
+	int opmod;
+	int modify_mask;
+	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
+						flow_table_properties_nic_receive.
+						flow_modify_en);
+	if (!atomic_mod_cap)
+		return -ENOTSUPP;
+	opmod = 1;
+	modify_mask = 1 <<
+		MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST;
+
+	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
+}
+
+int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned int index)
+{
+	u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
+	u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+	MLX5_SET(delete_fte_in, in, table_type, ft->type);
+	MLX5_SET(delete_fte_in, in, table_id, ft->id);
+	MLX5_SET(delete_fte_in, in, flow_index, index);
+
+	err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
new file mode 100644
index 000000000000..f39304ede186
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CMD_
+#define _MLX5_FS_CMD_
+
+int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+			       enum fs_flow_table_type type, unsigned int level,
+			       unsigned int log_size, unsigned int *table_id);
+
+int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+				struct mlx5_flow_table *ft);
+
+int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       u32 *in, unsigned int *group_id);
+
+int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+				struct mlx5_flow_table *ft,
+				unsigned int group_id);
+
+int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned group_id,
+			struct fs_fte *fte);
+
+int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned group_id,
+			struct fs_fte *fte);
+
+int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+			struct mlx5_flow_table *ft,
+			unsigned int index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
new file mode 100644
index 000000000000..f7d62fe595f6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -0,0 +1,1047 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
+					 sizeof(struct init_tree_node))
+
+#define INIT_PRIO(min_level_val, max_ft_val,\
+		  start_level_val, ...) {.type = FS_TYPE_PRIO,\
+	.min_ft_level = min_level_val,\
+	.start_level = start_level_val,\
+	.max_ft = max_ft_val,\
+	.children = (struct init_tree_node[]) {__VA_ARGS__},\
+	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\
+	INIT_PRIO(min_level_val, max_ft_val, start_level_val,\
+		  __VA_ARGS__)\
+
+#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\
+	INIT_PRIO(0, max_ft_val, start_level_val,\
+		  __VA_ARGS__)\
+
+#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
+	.children = (struct init_tree_node[]) {__VA_ARGS__},\
+	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define KERNEL_START_LEVEL 0
+#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL
+#define KERNEL_MAX_FT 2
+#define KENREL_MIN_LEVEL 2
+static struct init_tree_node {
+	enum fs_node_type	type;
+	struct init_tree_node *children;
+	int ar_size;
+	int min_ft_level;
+	int prio;
+	int max_ft;
+	int start_level;
+} root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 1,
+	.children = (struct init_tree_node[]) {
+		ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT,
+			 KERNEL_START_LEVEL,
+			 ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT,
+					    KERNEL_P0_START_LEVEL))),
+	}
+};
+
+static void del_rule(struct fs_node *node);
+static void del_flow_table(struct fs_node *node);
+static void del_flow_group(struct fs_node *node);
+static void del_fte(struct fs_node *node);
+
+static void tree_init_node(struct fs_node *node,
+			   unsigned int refcount,
+			   void (*remove_func)(struct fs_node *))
+{
+	atomic_set(&node->refcount, refcount);
+	INIT_LIST_HEAD(&node->list);
+	INIT_LIST_HEAD(&node->children);
+	mutex_init(&node->lock);
+	node->remove_func = remove_func;
+}
+
+static void tree_add_node(struct fs_node *node, struct fs_node *parent)
+{
+	if (parent)
+		atomic_inc(&parent->refcount);
+	node->parent = parent;
+
+	/* Parent is the root */
+	if (!parent)
+		node->root = node;
+	else
+		node->root = parent->root;
+}
+
+static void tree_get_node(struct fs_node *node)
+{
+	atomic_inc(&node->refcount);
+}
+
+static void nested_lock_ref_node(struct fs_node *node)
+{
+	if (node) {
+		mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING);
+		atomic_inc(&node->refcount);
+	}
+}
+
+static void lock_ref_node(struct fs_node *node)
+{
+	if (node) {
+		mutex_lock(&node->lock);
+		atomic_inc(&node->refcount);
+	}
+}
+
+static void unlock_ref_node(struct fs_node *node)
+{
+	if (node) {
+		atomic_dec(&node->refcount);
+		mutex_unlock(&node->lock);
+	}
+}
+
+static void tree_put_node(struct fs_node *node)
+{
+	struct fs_node *parent_node = node->parent;
+
+	lock_ref_node(parent_node);
+	if (atomic_dec_and_test(&node->refcount)) {
+		if (parent_node)
+			list_del_init(&node->list);
+		if (node->remove_func)
+			node->remove_func(node);
+		kfree(node);
+		node = NULL;
+	}
+	unlock_ref_node(parent_node);
+	if (!node && parent_node)
+		tree_put_node(parent_node);
+}
+
+static int tree_remove_node(struct fs_node *node)
+{
+	if (atomic_read(&node->refcount) > 1)
+		return -EPERM;
+	tree_put_node(node);
+	return 0;
+}
+
+static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
+				 unsigned int prio)
+{
+	struct fs_prio *iter_prio;
+
+	fs_for_each_prio(iter_prio, ns) {
+		if (iter_prio->prio == prio)
+			return iter_prio;
+	}
+
+	return NULL;
+}
+
+static unsigned int find_next_free_level(struct fs_prio *prio)
+{
+	if (!list_empty(&prio->node.children)) {
+		struct mlx5_flow_table *ft;
+
+		ft = list_last_entry(&prio->node.children,
+				     struct mlx5_flow_table,
+				     node.list);
+		return ft->level + 1;
+	}
+	return prio->start_level;
+}
+
+static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++, mask++, val1++, val2++)
+		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
+		    ((*(u8 *)val2) & (*(u8 *)mask)))
+			return false;
+
+	return true;
+}
+
+static bool compare_match_value(struct mlx5_flow_group_mask *mask,
+				void *fte_param1, void *fte_param2)
+{
+	if (mask->match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
+		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
+						fte_param1, outer_headers);
+		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
+						fte_param2, outer_headers);
+		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
+					      mask->match_criteria, outer_headers);
+
+		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
+				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
+			return false;
+	}
+
+	if (mask->match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
+		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
+						fte_param1, misc_parameters);
+		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
+						fte_param2, misc_parameters);
+		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
+					  mask->match_criteria, misc_parameters);
+
+		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
+				   MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+			return false;
+	}
+
+	if (mask->match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
+		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
+						fte_param1, inner_headers);
+		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
+						fte_param2, inner_headers);
+		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
+					  mask->match_criteria, inner_headers);
+
+		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
+				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
+			return false;
+	}
+	return true;
+}
+
+static bool compare_match_criteria(u8 match_criteria_enable1,
+				   u8 match_criteria_enable2,
+				   void *mask1, void *mask2)
+{
+	return match_criteria_enable1 == match_criteria_enable2 &&
+		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
+}
+
+static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+{
+	struct fs_node *root;
+	struct mlx5_flow_namespace *ns;
+
+	root = node->root;
+
+	if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
+		pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
+		return NULL;
+	}
+
+	ns = container_of(root, struct mlx5_flow_namespace, node);
+	return container_of(ns, struct mlx5_flow_root_namespace, ns);
+}
+
+static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
+{
+	struct mlx5_flow_root_namespace *root = find_root(node);
+
+	if (root)
+		return root->dev;
+	return NULL;
+}
+
+static void del_flow_table(struct fs_node *node)
+{
+	struct mlx5_flow_table *ft;
+	struct mlx5_core_dev *dev;
+	struct fs_prio *prio;
+	int err;
+
+	fs_get_obj(ft, node);
+	dev = get_dev(&ft->node);
+
+	err = mlx5_cmd_destroy_flow_table(dev, ft);
+	if (err)
+		pr_warn("flow steering can't destroy ft\n");
+	fs_get_obj(prio, ft->node.parent);
+	prio->num_ft--;
+}
+
+static void del_rule(struct fs_node *node)
+{
+	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	struct fs_fte *fte;
+	u32	*match_value;
+	struct mlx5_core_dev *dev = get_dev(node);
+	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
+	int err;
+
+	match_value = mlx5_vzalloc(match_len);
+	if (!match_value) {
+		pr_warn("failed to allocate inbox\n");
+		return;
+	}
+
+	fs_get_obj(rule, node);
+	fs_get_obj(fte, rule->node.parent);
+	fs_get_obj(fg, fte->node.parent);
+	memcpy(match_value, fte->val, sizeof(fte->val));
+	fs_get_obj(ft, fg->node.parent);
+	list_del(&rule->node.list);
+	fte->dests_size--;
+	if (fte->dests_size) {
+		err = mlx5_cmd_update_fte(dev, ft,
+					  fg->id, fte);
+		if (err)
+			pr_warn("%s can't del rule fg id=%d fte_index=%d\n",
+				__func__, fg->id, fte->index);
+	}
+	kvfree(match_value);
+}
+
+static void del_fte(struct fs_node *node)
+{
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	struct mlx5_core_dev *dev;
+	struct fs_fte *fte;
+	int err;
+
+	fs_get_obj(fte, node);
+	fs_get_obj(fg, fte->node.parent);
+	fs_get_obj(ft, fg->node.parent);
+
+	dev = get_dev(&ft->node);
+	err = mlx5_cmd_delete_fte(dev, ft,
+				  fte->index);
+	if (err)
+		pr_warn("flow steering can't delete fte in index %d of flow group id %d\n",
+			fte->index, fg->id);
+
+	fte->status = 0;
+	fg->num_ftes--;
+}
+
+static void del_flow_group(struct fs_node *node)
+{
+	struct mlx5_flow_group *fg;
+	struct mlx5_flow_table *ft;
+	struct mlx5_core_dev *dev;
+
+	fs_get_obj(fg, node);
+	fs_get_obj(ft, fg->node.parent);
+	dev = get_dev(&ft->node);
+
+	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+		pr_warn("flow steering can't destroy fg %d of ft %d\n",
+			fg->id, ft->id);
+}
+
+static struct fs_fte *alloc_fte(u8 action,
+				u32 flow_tag,
+				u32 *match_value,
+				unsigned int index)
+{
+	struct fs_fte *fte;
+
+	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
+	if (!fte)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(fte->val, match_value, sizeof(fte->val));
+	fte->node.type =  FS_TYPE_FLOW_ENTRY;
+	fte->flow_tag = flow_tag;
+	fte->index = index;
+	fte->action = action;
+
+	return fte;
+}
+
+static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
+{
+	struct mlx5_flow_group *fg;
+	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					    create_fg_in, match_criteria);
+	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+					    create_fg_in,
+					    match_criteria_enable);
+	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
+	if (!fg)
+		return ERR_PTR(-ENOMEM);
+
+	fg->mask.match_criteria_enable = match_criteria_enable;
+	memcpy(&fg->mask.match_criteria, match_criteria,
+	       sizeof(fg->mask.match_criteria));
+	fg->node.type =  FS_TYPE_FLOW_GROUP;
+	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
+				   start_flow_index);
+	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
+				end_flow_index) - fg->start_index + 1;
+	return fg;
+}
+
+static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
+						enum fs_flow_table_type table_type)
+{
+	struct mlx5_flow_table *ft;
+
+	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
+	if (!ft)
+		return NULL;
+
+	ft->level = level;
+	ft->node.type = FS_TYPE_FLOW_TABLE;
+	ft->type = table_type;
+	ft->max_fte = max_fte;
+
+	return ft;
+}
+
+struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+					       int prio,
+					       int max_fte)
+{
+	struct mlx5_flow_table *ft;
+	int err;
+	int log_table_sz;
+	struct mlx5_flow_root_namespace *root =
+		find_root(&ns->node);
+	struct fs_prio *fs_prio = NULL;
+
+	if (!root) {
+		pr_err("mlx5: flow steering failed to find root of namespace\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	fs_prio = find_prio(ns, prio);
+	if (!fs_prio)
+		return ERR_PTR(-EINVAL);
+
+	lock_ref_node(&fs_prio->node);
+	if (fs_prio->num_ft == fs_prio->max_ft) {
+		err = -ENOSPC;
+		goto unlock_prio;
+	}
+
+	ft = alloc_flow_table(find_next_free_level(fs_prio),
+			      roundup_pow_of_two(max_fte),
+			      root->table_type);
+	if (!ft) {
+		err = -ENOMEM;
+		goto unlock_prio;
+	}
+
+	tree_init_node(&ft->node, 1, del_flow_table);
+	log_table_sz = ilog2(ft->max_fte);
+	err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level,
+					 log_table_sz, &ft->id);
+	if (err)
+		goto free_ft;
+
+	tree_add_node(&ft->node, &fs_prio->node);
+	list_add_tail(&ft->node.list, &fs_prio->node.children);
+	fs_prio->num_ft++;
+	unlock_ref_node(&fs_prio->node);
+
+	return ft;
+
+free_ft:
+	kfree(ft);
+unlock_prio:
+	unlock_ref_node(&fs_prio->node);
+	return ERR_PTR(err);
+}
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+					       u32 *fg_in)
+{
+	struct mlx5_flow_group *fg;
+	struct mlx5_core_dev *dev = get_dev(&ft->node);
+	int err;
+
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	fg = alloc_flow_group(fg_in);
+	if (IS_ERR(fg))
+		return fg;
+
+	lock_ref_node(&ft->node);
+	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+	if (err) {
+		kfree(fg);
+		unlock_ref_node(&ft->node);
+		return ERR_PTR(err);
+	}
+	/* Add node to tree */
+	tree_init_node(&fg->node, 1, del_flow_group);
+	tree_add_node(&fg->node, &ft->node);
+	/* Add node to group list */
+	list_add(&fg->node.list, ft->node.children.prev);
+	unlock_ref_node(&ft->node);
+
+	return fg;
+}
+
+static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_rule *rule;
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->node.type = FS_TYPE_FLOW_DEST;
+	memcpy(&rule->dest_attr, dest, sizeof(*dest));
+
+	return rule;
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
+					   struct mlx5_flow_group *fg,
+					   struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_rule *rule;
+	int err;
+
+	rule = alloc_rule(dest);
+	if (!rule)
+		return ERR_PTR(-ENOMEM);
+
+	fs_get_obj(ft, fg->node.parent);
+	/* Add dest to dests list- added as first element after the head */
+	tree_init_node(&rule->node, 1, del_rule);
+	list_add_tail(&rule->node.list, &fte->node.children);
+	fte->dests_size++;
+	if (fte->dests_size == 1)
+		err = mlx5_cmd_create_fte(get_dev(&ft->node),
+					  ft, fg->id, fte);
+	else
+		err = mlx5_cmd_update_fte(get_dev(&ft->node),
+					  ft, fg->id, fte);
+	if (err)
+		goto free_rule;
+
+	fte->status |= FS_FTE_STATUS_EXISTING;
+
+	return rule;
+
+free_rule:
+	list_del(&rule->node.list);
+	kfree(rule);
+	fte->dests_size--;
+	return ERR_PTR(err);
+}
+
+/* Assumed fg is locked */
+static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
+				       struct list_head **prev)
+{
+	struct fs_fte *fte;
+	unsigned int start = fg->start_index;
+
+	if (prev)
+		*prev = &fg->node.children;
+
+	/* assumed list is sorted by index */
+	fs_for_each_fte(fte, fg) {
+		if (fte->index != start)
+			return start;
+		start++;
+		if (prev)
+			*prev = &fte->node.list;
+	}
+
+	return start;
+}
+
+/* prev is output, prev->next = new_fte */
+static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
+				 u32 *match_value,
+				 u8 action,
+				 u32 flow_tag,
+				 struct list_head **prev)
+{
+	struct fs_fte *fte;
+	int index;
+
+	index = get_free_fte_index(fg, prev);
+	fte = alloc_fte(action, flow_tag, match_value, index);
+	if (IS_ERR(fte))
+		return fte;
+
+	return fte;
+}
+
+/* Assuming parent fg(flow table) is locked */
+static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
+					  u32 *match_value,
+					  u8 action,
+					  u32 flow_tag,
+					  struct mlx5_flow_destination *dest)
+{
+	struct fs_fte *fte;
+	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_table *ft;
+	struct list_head *prev;
+
+	lock_ref_node(&fg->node);
+	fs_for_each_fte(fte, fg) {
+		nested_lock_ref_node(&fte->node);
+		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
+		    action == fte->action && flow_tag == fte->flow_tag) {
+			rule = add_rule_fte(fte, fg, dest);
+			unlock_ref_node(&fte->node);
+			if (IS_ERR(rule))
+				goto unlock_fg;
+			else
+				goto add_rule;
+		}
+		unlock_ref_node(&fte->node);
+	}
+	fs_get_obj(ft, fg->node.parent);
+	if (fg->num_ftes >= fg->max_ftes) {
+		rule = ERR_PTR(-ENOSPC);
+		goto unlock_fg;
+	}
+
+	fte = create_fte(fg, match_value, action, flow_tag, &prev);
+	if (IS_ERR(fte)) {
+		rule = (void *)fte;
+		goto unlock_fg;
+	}
+	tree_init_node(&fte->node, 0, del_fte);
+	rule = add_rule_fte(fte, fg, dest);
+	if (IS_ERR(rule)) {
+		kfree(fte);
+		goto unlock_fg;
+	}
+
+	fg->num_ftes++;
+
+	tree_add_node(&fte->node, &fg->node);
+	list_add(&fte->node.list, prev);
+add_rule:
+	tree_add_node(&rule->node, &fte->node);
+unlock_fg:
+	unlock_ref_node(&fg->node);
+	return rule;
+}
+
+struct mlx5_flow_rule *
+mlx5_add_flow_rule(struct mlx5_flow_table *ft,
+		   u8 match_criteria_enable,
+		   u32 *match_criteria,
+		   u32 *match_value,
+		   u32 action,
+		   u32 flow_tag,
+		   struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL);
+
+	tree_get_node(&ft->node);
+	lock_ref_node(&ft->node);
+	fs_for_each_fg(g, ft)
+		if (compare_match_criteria(g->mask.match_criteria_enable,
+					   match_criteria_enable,
+					   g->mask.match_criteria,
+					   match_criteria)) {
+			unlock_ref_node(&ft->node);
+			rule = add_rule_fg(g, match_value,
+					   action, flow_tag, dest);
+			goto put;
+		}
+	unlock_ref_node(&ft->node);
+put:
+	tree_put_node(&ft->node);
+	return rule;
+}
+
+void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
+{
+	tree_remove_node(&rule->node);
+}
+
+int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
+{
+	if (tree_remove_node(&ft->node))
+		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
+			       ft->id);
+
+	return 0;
+}
+
+void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
+{
+	if (tree_remove_node(&fg->node))
+		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
+			       fg->id);
+}
+
+struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
+						    enum mlx5_flow_namespace_type type)
+{
+	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
+	int prio;
+	static struct fs_prio *fs_prio;
+	struct mlx5_flow_namespace *ns;
+
+	if (!root_ns)
+		return NULL;
+
+	switch (type) {
+	case MLX5_FLOW_NAMESPACE_KERNEL:
+		prio = 0;
+		break;
+	case MLX5_FLOW_NAMESPACE_FDB:
+		if (dev->priv.fdb_root_ns)
+			return &dev->priv.fdb_root_ns->ns;
+		else
+			return NULL;
+	default:
+		return NULL;
+	}
+
+	fs_prio = find_prio(&root_ns->ns, prio);
+	if (!fs_prio)
+		return NULL;
+
+	ns = list_first_entry(&fs_prio->node.children,
+			      typeof(*ns),
+			      node.list);
+
+	return ns;
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+				      unsigned prio, int max_ft,
+				      int start_level)
+{
+	struct fs_prio *fs_prio;
+
+	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
+	if (!fs_prio)
+		return ERR_PTR(-ENOMEM);
+
+	fs_prio->node.type = FS_TYPE_PRIO;
+	tree_init_node(&fs_prio->node, 1, NULL);
+	tree_add_node(&fs_prio->node, &ns->node);
+	fs_prio->max_ft = max_ft;
+	fs_prio->prio = prio;
+	fs_prio->start_level = start_level;
+	list_add_tail(&fs_prio->node.list, &ns->node.children);
+
+	return fs_prio;
+}
+
+static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
+						     *ns)
+{
+	ns->node.type = FS_TYPE_NAMESPACE;
+
+	return ns;
+}
+
+static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
+{
+	struct mlx5_flow_namespace	*ns;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return ERR_PTR(-ENOMEM);
+
+	fs_init_namespace(ns);
+	tree_init_node(&ns->node, 1, NULL);
+	tree_add_node(&ns->node, &prio->node);
+	list_add_tail(&ns->node.list, &prio->node.children);
+
+	return ns;
+}
+
+static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node,
+				    struct fs_node *fs_parent_node,
+				    struct init_tree_node *init_parent_node,
+				    int index)
+{
+	struct mlx5_flow_namespace *fs_ns;
+	struct fs_prio *fs_prio;
+	struct fs_node *base;
+	int i;
+	int err;
+
+	if (init_node->type == FS_TYPE_PRIO) {
+		if (init_node->min_ft_level > max_ft_level)
+			return -ENOTSUPP;
+
+		fs_get_obj(fs_ns, fs_parent_node);
+		fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft,
+					 init_node->start_level);
+		if (IS_ERR(fs_prio))
+			return PTR_ERR(fs_prio);
+		base = &fs_prio->node;
+	} else if (init_node->type == FS_TYPE_NAMESPACE) {
+		fs_get_obj(fs_prio, fs_parent_node);
+		fs_ns = fs_create_namespace(fs_prio);
+		if (IS_ERR(fs_ns))
+			return PTR_ERR(fs_ns);
+		base = &fs_ns->node;
+	} else {
+		return -EINVAL;
+	}
+	for (i = 0; i < init_node->ar_size; i++) {
+		err = init_root_tree_recursive(max_ft_level,
+					       &init_node->children[i], base,
+					       init_node, i);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
+			  struct fs_node *fs_parent_node)
+{
+	int i;
+	struct mlx5_flow_namespace *fs_ns;
+	int err;
+
+	fs_get_obj(fs_ns, fs_parent_node);
+	for (i = 0; i < init_node->ar_size; i++) {
+		err = init_root_tree_recursive(max_ft_level,
+					       &init_node->children[i],
+					       &fs_ns->node,
+					       init_node, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev,
+						       enum fs_flow_table_type
+						       table_type)
+{
+	struct mlx5_flow_root_namespace *root_ns;
+	struct mlx5_flow_namespace *ns;
+
+	/* Create the root namespace */
+	root_ns = mlx5_vzalloc(sizeof(*root_ns));
+	if (!root_ns)
+		return NULL;
+
+	root_ns->dev = dev;
+	root_ns->table_type = table_type;
+
+	ns = &root_ns->ns;
+	fs_init_namespace(ns);
+	tree_init_node(&ns->node, 1, NULL);
+	tree_add_node(&ns->node, NULL);
+
+	return root_ns;
+}
+
+static int init_root_ns(struct mlx5_core_dev *dev)
+{
+	int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
+					      flow_table_properties_nic_receive.
+					      max_ft_level);
+
+	dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
+	if (IS_ERR_OR_NULL(dev->priv.root_ns))
+		goto cleanup;
+
+	if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node))
+		goto cleanup;
+
+	return 0;
+
+cleanup:
+	mlx5_cleanup_fs(dev);
+	return -ENOMEM;
+}
+
+static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
+					struct mlx5_flow_root_namespace *root_ns)
+{
+	struct fs_node *prio;
+
+	if (!root_ns)
+		return;
+
+	if (!list_empty(&root_ns->ns.node.children)) {
+		prio = list_first_entry(&root_ns->ns.node.children,
+					struct fs_node,
+				 list);
+		if (tree_remove_node(prio))
+			mlx5_core_warn(dev,
+				       "Flow steering priority wasn't destroyed, refcount > 1\n");
+	}
+	if (tree_remove_node(&root_ns->ns.node))
+		mlx5_core_warn(dev,
+			       "Flow steering namespace wasn't destroyed, refcount > 1\n");
+	root_ns = NULL;
+}
+
+static void cleanup_root_ns(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
+	struct fs_prio *iter_prio;
+
+	if (!MLX5_CAP_GEN(dev, nic_flow_table))
+		return;
+
+	if (!root_ns)
+		return;
+
+	/* stage 1 */
+	fs_for_each_prio(iter_prio, &root_ns->ns) {
+		struct fs_node *node;
+		struct mlx5_flow_namespace *iter_ns;
+
+		fs_for_each_ns_or_ft(node, iter_prio) {
+			if (node->type == FS_TYPE_FLOW_TABLE)
+				continue;
+			fs_get_obj(iter_ns, node);
+			while (!list_empty(&iter_ns->node.children)) {
+				struct fs_prio *obj_iter_prio2;
+				struct fs_node *iter_prio2 =
+					list_first_entry(&iter_ns->node.children,
+							 struct fs_node,
+							 list);
+
+				fs_get_obj(obj_iter_prio2, iter_prio2);
+				if (tree_remove_node(iter_prio2)) {
+					mlx5_core_warn(dev,
+						       "Priority %d wasn't destroyed, refcount > 1\n",
+						       obj_iter_prio2->prio);
+					return;
+				}
+			}
+		}
+	}
+
+	/* stage 2 */
+	fs_for_each_prio(iter_prio, &root_ns->ns) {
+		while (!list_empty(&iter_prio->node.children)) {
+			struct fs_node *iter_ns =
+				list_first_entry(&iter_prio->node.children,
+						 struct fs_node,
+						 list);
+			if (tree_remove_node(iter_ns)) {
+				mlx5_core_warn(dev,
+					       "Namespace wasn't destroyed, refcount > 1\n");
+				return;
+			}
+		}
+	}
+
+	/* stage 3 */
+	while (!list_empty(&root_ns->ns.node.children)) {
+		struct fs_prio *obj_prio_node;
+		struct fs_node *prio_node =
+			list_first_entry(&root_ns->ns.node.children,
+					 struct fs_node,
+					 list);
+
+		fs_get_obj(obj_prio_node, prio_node);
+		if (tree_remove_node(prio_node)) {
+			mlx5_core_warn(dev,
+				       "Priority %d wasn't destroyed, refcount > 1\n",
+				       obj_prio_node->prio);
+			return;
+		}
+	}
+
+	if (tree_remove_node(&root_ns->ns.node)) {
+		mlx5_core_warn(dev,
+			       "root namespace wasn't destroyed, refcount > 1\n");
+		return;
+	}
+
+	dev->priv.root_ns = NULL;
+}
+
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
+{
+	cleanup_root_ns(dev);
+	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
+}
+
+static int init_fdb_root_ns(struct mlx5_core_dev *dev)
+{
+	struct fs_prio *prio;
+
+	dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB);
+	if (!dev->priv.fdb_root_ns)
+		return -ENOMEM;
+
+	/* Create single prio */
+	prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0);
+	if (IS_ERR(prio)) {
+		cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
+		return PTR_ERR(prio);
+	} else {
+		return 0;
+	}
+}
+
+int mlx5_init_fs(struct mlx5_core_dev *dev)
+{
+	int err = 0;
+
+	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
+		err = init_root_ns(dev);
+		if (err)
+			return err;
+	}
+	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+		err = init_fdb_root_ns(dev);
+		if (err)
+			cleanup_root_ns(dev);
+	}
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
new file mode 100644
index 000000000000..4ebb97fd5544
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CORE_
+#define _MLX5_FS_CORE_
+
+#include <linux/mlx5/fs.h>
+
+enum fs_node_type {
+	FS_TYPE_NAMESPACE,
+	FS_TYPE_PRIO,
+	FS_TYPE_FLOW_TABLE,
+	FS_TYPE_FLOW_GROUP,
+	FS_TYPE_FLOW_ENTRY,
+	FS_TYPE_FLOW_DEST
+};
+
+enum fs_flow_table_type {
+	FS_FT_NIC_RX	 = 0x0,
+	FS_FT_FDB	 = 0X4,
+};
+
+enum fs_fte_status {
+	FS_FTE_STATUS_EXISTING = 1UL << 0,
+};
+
+struct fs_node {
+	struct list_head	list;
+	struct list_head	children;
+	enum fs_node_type	type;
+	struct fs_node		*parent;
+	struct fs_node		*root;
+	/* lock the node for writing and traversing */
+	struct mutex		lock;
+	atomic_t		refcount;
+	void			(*remove_func)(struct fs_node *);
+};
+
+struct mlx5_flow_rule {
+	struct fs_node				node;
+	struct mlx5_flow_destination		dest_attr;
+};
+
+/* Type of children is mlx5_flow_group */
+struct mlx5_flow_table {
+	struct fs_node			node;
+	u32				id;
+	unsigned int			max_fte;
+	unsigned int			level;
+	enum fs_flow_table_type		type;
+};
+
+/* Type of children is mlx5_flow_rule */
+struct fs_fte {
+	struct fs_node			node;
+	u32				val[MLX5_ST_SZ_DW(fte_match_param)];
+	u32				dests_size;
+	u32				flow_tag;
+	u32				index;
+	u32				action;
+	enum fs_fte_status		status;
+};
+
+/* Type of children is mlx5_flow_table/namespace */
+struct fs_prio {
+	struct fs_node			node;
+	unsigned int			max_ft;
+	unsigned int			start_level;
+	unsigned int			prio;
+	unsigned int			num_ft;
+};
+
+/* Type of children is fs_prio */
+struct mlx5_flow_namespace {
+	/* parent == NULL => root ns */
+	struct	fs_node			node;
+};
+
+struct mlx5_flow_group_mask {
+	u8	match_criteria_enable;
+	u32	match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+};
+
+/* Type of children is fs_fte */
+struct mlx5_flow_group {
+	struct fs_node			node;
+	struct mlx5_flow_group_mask	mask;
+	u32				start_index;
+	u32				max_ftes;
+	u32				num_ftes;
+	u32				id;
+};
+
+struct mlx5_flow_root_namespace {
+	struct mlx5_flow_namespace	ns;
+	enum   fs_flow_table_type	table_type;
+	struct mlx5_core_dev		*dev;
+};
+
+int mlx5_init_fs(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+
+#define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
+
+#define fs_list_for_each_entry(pos, root)		\
+	list_for_each_entry(pos, root, node.list)
+
+#define fs_for_each_ns_or_ft_reverse(pos, prio)				\
+	list_for_each_entry_reverse(pos, &(prio)->node.children, list)
+
+#define fs_for_each_ns_or_ft(pos, prio)					\
+	list_for_each_entry(pos, (&(prio)->node.children), list)
+
+#define fs_for_each_prio(pos, ns)			\
+	fs_list_for_each_entry(pos, &(ns)->node.children)
+
+#define fs_for_each_fg(pos, ft)			\
+	fs_list_for_each_entry(pos, &(ft)->node.children)
+
+#define fs_for_each_fte(pos, fg)			\
+	fs_list_for_each_entry(pos, &(fg)->node.children)
+
+#define fs_for_each_dst(pos, fte)			\
+	fs_list_for_each_entry(pos, &(fte)->node.children)
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 9335e5ae18cc..aa1ab4702385 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 		if (err)
 			return err;
 	}
+
+	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
+	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
+					 HCA_CAP_OPMOD_GET_CUR);
+		if (err)
+			return err;
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
+					 HCA_CAP_OPMOD_GET_MAX);
+		if (err)
+			return err;
+	}
+
+	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
+					 HCA_CAP_OPMOD_GET_CUR);
+		if (err)
+			return err;
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
+					 HCA_CAP_OPMOD_GET_MAX);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4ac8d4cc4973..789882b7b711 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -49,6 +49,10 @@
 #include <linux/delay.h>
 #include <linux/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
+#include "fs_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -454,6 +458,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
 	struct mlx5_reg_host_endianess he_out;
 	int err;
 
+	if (!mlx5_core_is_pf(dev))
+		return 0;
+
 	memset(&he_in, 0, sizeof(he_in));
 	he_in.he = MLX5_SET_HOST_ENDIANNESS;
 	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
@@ -462,42 +469,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
 	return err;
 }
 
-static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
+	u32 out[MLX5_ST_SZ_DW(enable_hca_out)];
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)];
 	int err;
-	struct mlx5_enable_hca_mbox_in in;
-	struct mlx5_enable_hca_mbox_out out;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA);
+	memset(in, 0, sizeof(in));
+	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+	MLX5_SET(enable_hca_in, in, function_id, func_id);
+	memset(out, 0, sizeof(out));
+
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	return 0;
+	return mlx5_cmd_status_to_err_v2(out);
 }
 
-static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
+	u32 out[MLX5_ST_SZ_DW(disable_hca_out)];
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)];
 	int err;
-	struct mlx5_disable_hca_mbox_in in;
-	struct mlx5_disable_hca_mbox_out out;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	memset(in, 0, sizeof(in));
+	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+	MLX5_SET(disable_hca_in, in, function_id, func_id);
+	memset(out, 0, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	return 0;
+	return mlx5_cmd_status_to_err_v2(out);
 }
 
 static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
@@ -942,7 +946,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_pagealloc_init(dev);
 
-	err = mlx5_core_enable_hca(dev);
+	err = mlx5_core_enable_hca(dev, 0);
 	if (err) {
 		dev_err(&pdev->dev, "enable hca failed\n");
 		goto err_pagealloc_cleanup;
@@ -1052,6 +1056,25 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_init_srq_table(dev);
 	mlx5_init_mr_table(dev);
 
+	err = mlx5_init_fs(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init flow steering\n");
+		goto err_fs;
+	}
+#ifdef CONFIG_MLX5_CORE_EN
+	err = mlx5_eswitch_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "eswitch init failed %d\n", err);
+		goto err_reg_dev;
+	}
+#endif
+
+	err = mlx5_sriov_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "sriov init failed %d\n", err);
+		goto err_sriov;
+	}
+
 	err = mlx5_register_device(dev);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
@@ -1068,7 +1091,16 @@ out:
 
 	return 0;
 
+err_sriov:
+	if (mlx5_sriov_cleanup(dev))
+		dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
+
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
 err_reg_dev:
+	mlx5_cleanup_fs(dev);
+err_fs:
 	mlx5_cleanup_mr_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1106,7 +1138,7 @@ reclaim_boot_pages:
 	mlx5_reclaim_startup_pages(dev);
 
 err_disable_hca:
-	mlx5_core_disable_hca(dev);
+	mlx5_core_disable_hca(dev, 0);
 
 err_pagealloc_cleanup:
 	mlx5_pagealloc_cleanup(dev);
@@ -1123,6 +1155,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
 	int err = 0;
 
+	err = mlx5_sriov_cleanup(dev);
+	if (err) {
+		dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
+			 __func__);
+		return err;
+	}
+
 	mutex_lock(&dev->intf_state_mutex);
 	if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) {
 		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
@@ -1130,6 +1169,11 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto out;
 	}
 	mlx5_unregister_device(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
+
+	mlx5_cleanup_fs(dev);
 	mlx5_cleanup_mr_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1149,7 +1193,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	}
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
-	mlx5_core_disable_hca(dev);
+	mlx5_core_disable_hca(dev, 0);
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 
@@ -1195,6 +1239,7 @@ static int init_one(struct pci_dev *pdev,
 		return -ENOMEM;
 	}
 	priv = &dev->priv;
+	priv->pci_dev_data = id->driver_data;
 
 	pci_set_drvdata(pdev, dev);
 
@@ -1365,12 +1410,12 @@ static const struct pci_error_handlers mlx5_err_handler = {
 };
 
 static const struct pci_device_id mlx5_core_pci_table[] = {
-	{ PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
-	{ PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */
-	{ PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */
-	{ PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1011) },			/* Connect-IB */
+	{ PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},	/* Connect-IB VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1013) },			/* ConnectX-4 */
+	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
+	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
 	{ 0, }
 };
 
@@ -1381,7 +1426,8 @@ static struct pci_driver mlx5_core_driver = {
 	.id_table       = mlx5_core_pci_table,
 	.probe          = init_one,
 	.remove         = remove_one,
-	.err_handler	= &mlx5_err_handler
+	.err_handler	= &mlx5_err_handler,
+	.sriov_configure   = mlx5_core_sriov_configure,
 };
 
 static int __init init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index cee5b7a839bc..ea6a137fd76c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/if_link.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "3.0-1"
@@ -64,6 +65,9 @@ do {									\
 		(__dev)->priv.name, __func__, __LINE__, current->pid,	\
 		##__VA_ARGS__)
 
+#define mlx5_core_info(__dev, format, ...)				\
+	dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+
 enum {
 	MLX5_CMD_DATA, /* print command payload only */
 	MLX5_CMD_TIME, /* print command execution time */
@@ -90,6 +94,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 4d3377b12657..9eeee0545f1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -33,6 +33,7 @@
 #include <linux/highmem.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
@@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox {
 
 enum {
 	MAX_RECLAIM_TIME_MSECS	= 5000,
+	MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
 };
 
 enum {
@@ -352,6 +354,10 @@ retry:
 		goto out_4k;
 	}
 
+	dev->priv.fw_pages += npages;
+	if (func_id)
+		dev->priv.vfs_pages += npages;
+
 	mlx5_core_dbg(dev, "err %d\n", err);
 
 	kvfree(in);
@@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 	num_claimed = be32_to_cpu(out->num_entries);
+	if (num_claimed > npages) {
+		mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
+			       num_claimed, npages);
+		err = -EINVAL;
+		goto out_free;
+	}
 	if (nclaimed)
 		*nclaimed = num_claimed;
 
@@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		addr = be64_to_cpu(out->pas[i]);
 		free_4k(dev, addr);
 	}
+	dev->priv.fw_pages -= num_claimed;
+	if (func_id)
+		dev->priv.vfs_pages -= num_claimed;
 
 out_free:
 	kvfree(out);
@@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 {
 	destroy_workqueue(dev->priv.pg_wq);
 }
+
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+{
+	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+	int prev_vfs_pages = dev->priv.vfs_pages;
+
+	mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+		      dev->priv.name);
+	while (dev->priv.vfs_pages) {
+		if (time_after(jiffies, end)) {
+			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+			return -ETIMEDOUT;
+		}
+		if (dev->priv.vfs_pages < prev_vfs_pages) {
+			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+			prev_vfs_pages = dev->priv.vfs_pages;
+		}
+		msleep(50);
+	}
+
+	mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
new file mode 100644
index 000000000000..7b24386794f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_MLX5_CORE_EN
+#include "eswitch.h"
+#endif
+
+static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int err;
+	int vf;
+
+	for (vf = 1; vf <= num_vfs; vf++) {
+		err = mlx5_core_enable_hca(dev, vf);
+		if (err) {
+			mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
+		} else {
+			sriov->vfs_ctx[vf - 1].enabled = 1;
+			mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
+		}
+	}
+}
+
+static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int vf;
+
+	for (vf = 1; vf <= num_vfs; vf++) {
+		if (sriov->vfs_ctx[vf - 1].enabled) {
+			if (mlx5_core_disable_hca(dev, vf))
+				mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
+			else
+				sriov->vfs_ctx[vf - 1].enabled = 0;
+		}
+	}
+}
+
+static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	int err;
+
+	if (pci_num_vf(pdev))
+		pci_disable_sriov(pdev);
+
+	enable_vfs(dev, num_vfs);
+
+	err = pci_enable_sriov(pdev, num_vfs);
+	if (err) {
+		dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
+		goto ex;
+	}
+
+	return 0;
+
+ex:
+	disable_vfs(dev, num_vfs);
+	return err;
+}
+
+static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int err;
+
+	kfree(sriov->vfs_ctx);
+	sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
+	if (!sriov->vfs_ctx)
+		return -ENOMEM;
+
+	sriov->enabled_vfs = num_vfs;
+	err = mlx5_core_create_vfs(pdev, num_vfs);
+	if (err) {
+		kfree(sriov->vfs_ctx);
+		sriov->vfs_ctx = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+	sriov->num_vfs = num_vfs;
+}
+
+static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_sriov *sriov;
+
+	sriov = &dev->priv.sriov;
+	disable_vfs(dev, sriov->num_vfs);
+
+	if (mlx5_wait_for_vf_pages(dev))
+		mlx5_core_warn(dev, "timeout claiming VFs pages\n");
+
+	sriov->num_vfs = 0;
+}
+
+int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int err;
+
+	mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs);
+	if (!mlx5_core_is_pf(dev))
+		return -EPERM;
+
+	mlx5_core_cleanup_vfs(dev);
+
+	if (!num_vfs) {
+#ifdef CONFIG_MLX5_CORE_EN
+		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+#endif
+		kfree(sriov->vfs_ctx);
+		sriov->vfs_ctx = NULL;
+		if (!pci_vfs_assigned(pdev))
+			pci_disable_sriov(pdev);
+		else
+			pr_info("unloading PF driver while leaving orphan VFs\n");
+		return 0;
+	}
+
+	err = mlx5_core_sriov_enable(pdev, num_vfs);
+	if (err) {
+		dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err);
+		return err;
+	}
+
+	mlx5_core_init_vfs(dev, num_vfs);
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs);
+#endif
+
+	return num_vfs;
+}
+
+static int sync_required(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int cur_vfs = pci_num_vf(pdev);
+
+	if (cur_vfs != sriov->num_vfs) {
+		pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs);
+		return 1;
+	}
+
+	return 0;
+}
+
+int mlx5_sriov_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	struct pci_dev *pdev = dev->pdev;
+	int cur_vfs;
+
+	if (!mlx5_core_is_pf(dev))
+		return 0;
+
+	if (!sync_required(dev->pdev))
+		return 0;
+
+	cur_vfs = pci_num_vf(pdev);
+	sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+	if (!sriov->vfs_ctx)
+		return -ENOMEM;
+
+	sriov->enabled_vfs = cur_vfs;
+
+	mlx5_core_init_vfs(dev, cur_vfs);
+#ifdef CONFIG_MLX5_CORE_EN
+	if (cur_vfs)
+		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs);
+#endif
+
+	enable_vfs(dev, cur_vfs);
+
+	return 0;
+}
+
+int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+{
+	struct pci_dev *pdev = dev->pdev;
+	int err;
+
+	if (!mlx5_core_is_pf(dev))
+		return 0;
+
+	err = mlx5_core_sriov_configure(pdev, 0);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b94177ebcf3a..076197efea9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,54 +36,399 @@
 #include <linux/mlx5/vport.h>
 #include "mlx5_core.h"
 
-u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod)
+static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
+				   u16 vport, u32 *out, int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
-	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)];
 	int err;
+	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(query_vport_state_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_STATE);
 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+	MLX5_SET(query_vport_state_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_vport_state_in, in, other_vport, 1);
 
-	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
-					 sizeof(out));
+	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
 	if (err)
 		mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n");
 
+	return err;
+}
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+	_mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
 	return MLX5_GET(query_vport_state_out, out, state);
 }
-EXPORT_SYMBOL(mlx5_query_vport_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_state);
+
+u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+	_mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
+	return MLX5_GET(query_vport_state_out, out, admin_state);
+}
+EXPORT_SYMBOL(mlx5_query_vport_admin_state);
 
-void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+				  u16 vport, u8 state)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)];
+	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(modify_vport_state_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_VPORT_STATE);
+	MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
+	MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+
+	if (vport)
+		MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+
+	MLX5_SET(modify_vport_state_in, in, admin_state, state);
+
+	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
+					 sizeof(out));
+	if (err)
+		mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n");
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+
+static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
+					u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
+					 int inlen)
+{
+	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+	memset(out, 0, sizeof(out));
+	return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+				     u16 vport, u8 *addr)
 {
-	u32  in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
 	u8 *out_addr;
+	int err;
 
 	out = mlx5_vzalloc(outlen);
 	if (!out)
-		return;
+		return -ENOMEM;
 
 	out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
 				nic_vport_context.permanent_address);
 
+	err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+	if (err)
+		goto out;
+
+	ether_addr_copy(addr, &out_addr[2]);
+
+out:
+	kvfree(out);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+				      u16 vport, u8 *addr)
+{
+	void *in;
+	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	int err;
+	void *nic_vport_ctx;
+	u8 *perm_mac;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		mlx5_core_warn(mdev, "failed to allocate inbox\n");
+		return -ENOMEM;
+	}
+
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.permanent_address, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+
+	if (vport)
+		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+				     in, nic_vport_context);
+	perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx,
+				permanent_address);
+
+	ether_addr_copy(&perm_mac[2], addr);
+
+	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+				  u32 vport,
+				  enum mlx5_list_type list_type,
+				  u8 addr_list[][ETH_ALEN],
+				  int *list_size)
+{
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+	void *nic_vport_ctx;
+	int max_list_size;
+	int req_list_size;
+	int out_sz;
+	void *out;
+	int err;
+	int i;
+
+	req_list_size = *list_size;
+
+	max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+		1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+		1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+	if (req_list_size > max_list_size) {
+		mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n",
+			       req_list_size, max_list_size);
+		req_list_size = max_list_size;
+	}
+
+	out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+			req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
 	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
 
 	MLX5_SET(query_nic_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
 
-	memset(out, 0, outlen);
-	mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+	if (vport)
+		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-	ether_addr_copy(addr, &out_addr[2]);
+	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
 
-	kvfree(out);
+	nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+				     nic_vport_context);
+	req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+				 allowed_list_size);
+
+	*list_size = req_list_size;
+	for (i = 0; i < req_list_size; i++) {
+		u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context,
+					nic_vport_ctx,
+					current_uc_mac_address[i]) + 2;
+		ether_addr_copy(addr_list[i], mac_addr);
+	}
+out:
+	kfree(out);
+	return err;
 }
-EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+				   enum mlx5_list_type list_type,
+				   u8 addr_list[][ETH_ALEN],
+				   int list_size)
+{
+	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	void *nic_vport_ctx;
+	int max_list_size;
+	int in_sz;
+	void *in;
+	int err;
+	int i;
+
+	max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+		 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+		 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+	if (list_size > max_list_size)
+		return -ENOSPC;
+
+	in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+		list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+	memset(out, 0, sizeof(out));
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.addresses_list, 1);
+
+	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+				     nic_vport_context);
+
+	MLX5_SET(nic_vport_context, nic_vport_ctx,
+		 allowed_list_type, list_type);
+	MLX5_SET(nic_vport_context, nic_vport_ctx,
+		 allowed_list_size, list_size);
+
+	for (i = 0; i < list_size; i++) {
+		u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context,
+					    nic_vport_ctx,
+					    current_uc_mac_address[i]) + 2;
+		ether_addr_copy(curr_mac, addr_list[i]);
+	}
+
+	err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
+
+int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
+			       u32 vport,
+			       u16 vlans[],
+			       int *size)
+{
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+	void *nic_vport_ctx;
+	int req_list_size;
+	int max_list_size;
+	int out_sz;
+	void *out;
+	int err;
+	int i;
+
+	req_list_size = *size;
+	max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+	if (req_list_size > max_list_size) {
+		mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n",
+			       req_list_size, max_list_size);
+		req_list_size = max_list_size;
+	}
+
+	out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+			req_list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, allowed_list_type,
+		 MLX5_NVPRT_LIST_TYPE_VLAN);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+	if (vport)
+		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
+
+	nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+				     nic_vport_context);
+	req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+				 allowed_list_size);
+
+	*size = req_list_size;
+	for (i = 0; i < req_list_size; i++) {
+		void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+					       nic_vport_ctx,
+					       current_uc_mac_address[i]);
+		vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan);
+	}
+out:
+	kfree(out);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans);
+
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+				u16 vlans[],
+				int list_size)
+{
+	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	void *nic_vport_ctx;
+	int max_list_size;
+	int in_sz;
+	void *in;
+	int err;
+	int i;
+
+	max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+
+	if (list_size > max_list_size)
+		return -ENOSPC;
+
+	in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+		list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+	memset(out, 0, sizeof(out));
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.addresses_list, 1);
+
+	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+				     nic_vport_context);
+
+	MLX5_SET(nic_vport_context, nic_vport_ctx,
+		 allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN);
+	MLX5_SET(nic_vport_context, nic_vport_ctx,
+		 allowed_list_size, list_size);
+
+	for (i = 0; i < list_size; i++) {
+		void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+					       nic_vport_ctx,
+					       current_uc_mac_address[i]);
+		MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
+	}
+
+	err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
 
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
 			     u8 port_num, u16  vf_num, u16 gid_index,
@@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
+
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+				 u32 vport,
+				 int *promisc_uc,
+				 int *promisc_mc,
+				 int *promisc_all)
+{
+	u32 *out;
+	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+	int err;
+
+	out = kzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+	if (err)
+		goto out;
+
+	*promisc_uc = MLX5_GET(query_nic_vport_context_out, out,
+			       nic_vport_context.promisc_uc);
+	*promisc_mc = MLX5_GET(query_nic_vport_context_out, out,
+			       nic_vport_context.promisc_mc);
+	*promisc_all = MLX5_GET(query_nic_vport_context_out, out,
+				nic_vport_context.promisc_all);
+
+out:
+	kfree(out);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
+
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+				  int promisc_uc,
+				  int promisc_mc,
+				  int promisc_all)
+{
+	void *in;
+	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	int err;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		mlx5_core_err(mdev, "failed to allocate inbox\n");
+		return -ENOMEM;
+	}
+
+	MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 nic_vport_context.promisc_uc, promisc_uc);
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 nic_vport_context.promisc_mc, promisc_mc);
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 nic_vport_context.promisc_all, promisc_all);
+
+	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index e36e12219c9b..ec8caf8fedc6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -10,6 +10,14 @@ config MLXSW_CORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called mlxsw_core.
 
+config MLXSW_CORE_HWMON
+	bool "HWMON support for Mellanox Technologies Switch ASICs"
+	depends on MLXSW_CORE && HWMON
+	depends on !(MLXSW_CORE=y && HWMON=m)
+	default y
+	---help---
+	  Say Y here if you want to expose HWMON interface on mlxsw devices.
+
 config MLXSW_PCI
 	tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
 	depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index af015818fd19..584cac444852 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_MLXSW_CORE)	+= mlxsw_core.o
 mlxsw_core-objs			:= core.o
+mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
 obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
 mlxsw_pci-objs			:= pci.o
 obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 97f0d93caf99..af8a48b3b3ad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -105,6 +105,10 @@ struct mlxsw_core {
 		struct debugfs_blob_wrapper vsd_blob;
 		struct debugfs_blob_wrapper psid_blob;
 	} dbg;
+	struct {
+		u8 *mapping; /* lag_id+port_index to local_port mapping */
+	} lag;
+	struct mlxsw_hwmon *hwmon;
 	unsigned long driver_priv[0];
 	/* driver_priv has to be always the last item */
 };
@@ -814,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 		goto err_alloc_stats;
 	}
 
+	if (mlxsw_driver->profile->used_max_lag &&
+	    mlxsw_driver->profile->used_max_port_per_lag) {
+		alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag *
+			     mlxsw_driver->profile->max_port_per_lag;
+		mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
+		if (!mlxsw_core->lag.mapping) {
+			err = -ENOMEM;
+			goto err_alloc_lag_mapping;
+		}
+	}
+
 	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile);
 	if (err)
 		goto err_bus_init;
@@ -822,6 +837,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_emad_init;
 
+	err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
+	if (err)
+		goto err_hwmon_init;
+
 	err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core,
 				 mlxsw_bus_info);
 	if (err)
@@ -836,10 +855,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 err_debugfs_init:
 	mlxsw_core->driver->fini(mlxsw_core->driver_priv);
 err_driver_init:
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
+err_hwmon_init:
 	mlxsw_emad_fini(mlxsw_core);
 err_emad_init:
 	mlxsw_bus->fini(bus_priv);
 err_bus_init:
+	kfree(mlxsw_core->lag.mapping);
+err_alloc_lag_mapping:
 	free_percpu(mlxsw_core->pcpu_stats);
 err_alloc_stats:
 	kfree(mlxsw_core);
@@ -855,8 +878,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
 
 	mlxsw_core_debugfs_fini(mlxsw_core);
 	mlxsw_core->driver->fini(mlxsw_core->driver_priv);
+	mlxsw_hwmon_fini(mlxsw_core->hwmon);
 	mlxsw_emad_fini(mlxsw_core);
 	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+	kfree(mlxsw_core->lag.mapping);
 	free_percpu(mlxsw_core->pcpu_stats);
 	kfree(mlxsw_core);
 	mlxsw_core_driver_put(device_kind);
@@ -1188,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 	struct mlxsw_rx_listener_item *rxl_item;
 	const struct mlxsw_rx_listener *rxl;
 	struct mlxsw_core_pcpu_stats *pcpu_stats;
-	u8 local_port = rx_info->sys_port;
+	u8 local_port;
 	bool found = false;
 
-	dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n",
-			    __func__, rx_info->sys_port, rx_info->trap_id);
+	if (rx_info->is_lag) {
+		dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n",
+				    __func__, rx_info->u.lag_id,
+				    rx_info->trap_id);
+		/* Upper layer does not care if the skb came from LAG or not,
+		 * so just get the local_port for the lag port and push it up.
+		 */
+		local_port = mlxsw_core_lag_mapping_get(mlxsw_core,
+							rx_info->u.lag_id,
+							rx_info->lag_port_index);
+	} else {
+		local_port = rx_info->u.sys_port;
+	}
+
+	dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n",
+			    __func__, local_port, rx_info->trap_id);
 
 	if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) ||
 	    (local_port >= MLXSW_PORT_MAX_PORTS))
@@ -1236,6 +1275,48 @@ drop:
 }
 EXPORT_SYMBOL(mlxsw_core_skb_receive);
 
+static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
+					u16 lag_id, u8 port_index)
+{
+	return mlxsw_core->driver->profile->max_port_per_lag * lag_id +
+	       port_index;
+}
+
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+				u16 lag_id, u8 port_index, u8 local_port)
+{
+	int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+						 lag_id, port_index);
+
+	mlxsw_core->lag.mapping[index] = local_port;
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_set);
+
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+			      u16 lag_id, u8 port_index)
+{
+	int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+						 lag_id, port_index);
+
+	return mlxsw_core->lag.mapping[index];
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_get);
+
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+				  u16 lag_id, u8 local_port)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) {
+		int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+							 lag_id, i);
+
+		if (mlxsw_core->lag.mapping[index] == local_port)
+			mlxsw_core->lag.mapping[index] = 0;
+	}
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);
+
 int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
 		   u32 in_mod, bool out_mbox_direct,
 		   char *in_mbox, size_t in_mbox_size,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 807827350a89..4833fb33ce07 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
 		    const struct mlxsw_reg_info *reg, char *payload);
 
 struct mlxsw_rx_info {
-	u16 sys_port;
+	bool is_lag;
+	union {
+		u16 sys_port;
+		u16 lag_id;
+	} u;
+	u8 lag_port_index;
 	int trap_id;
 };
 
 void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
 			    struct mlxsw_rx_info *rx_info);
 
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+				u16 lag_id, u8 port_index, u8 local_port);
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+			      u16 lag_id, u8 port_index);
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+				  u16 lag_id, u8 local_port);
+
 #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
 
 struct mlxsw_swid_config {
@@ -209,4 +221,28 @@ struct mlxsw_bus_info {
 	u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
 };
 
+struct mlxsw_hwmon;
+
+#ifdef CONFIG_MLXSW_CORE_HWMON
+
+int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+		     const struct mlxsw_bus_info *mlxsw_bus_info,
+		     struct mlxsw_hwmon **p_hwmon);
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon);
+
+#else
+
+static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+				   const struct mlxsw_bus_info *mlxsw_bus_info,
+				   struct mlxsw_hwmon **p_hwmon)
+{
+	return 0;
+}
+
+static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+}
+
+#endif
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
new file mode 100644
index 000000000000..b86db967eab9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -0,0 +1,342 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/err.h>
+
+#include "core.h"
+
+#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
+#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
+				MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX)
+
+struct mlxsw_hwmon_attr {
+	struct device_attribute dev_attr;
+	struct mlxsw_hwmon *hwmon;
+	unsigned int type_index;
+	char name[16];
+};
+
+struct mlxsw_hwmon {
+	struct mlxsw_core *core;
+	const struct mlxsw_bus_info *bus_info;
+	struct device *hwmon_dev;
+	struct attribute_group group;
+	const struct attribute_group *groups[2];
+	struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1];
+	struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
+	unsigned int attrs_count;
+};
+
+static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int temp;
+	int err;
+
+	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
+			    false, false);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
+		return err;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int temp_max;
+	int err;
+
+	mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
+			    false, false);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
+		return err;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
+	return sprintf(buf, "%u\n", temp_max);
+}
+
+static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mfsm_pl[MLXSW_REG_MFSM_LEN];
+	int err;
+
+	mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n");
+		return err;
+	}
+	return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl));
+}
+
+static ssize_t mlxsw_hwmon_pwm_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mfsc_pl[MLXSW_REG_MFSC_LEN];
+	int err;
+
+	mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0);
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n");
+		return err;
+	}
+	return sprintf(buf, "%u\n",
+		       mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl));
+}
+
+static ssize_t mlxsw_hwmon_pwm_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+	struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+	char mfsc_pl[MLXSW_REG_MFSC_LEN];
+	unsigned long val;
+	int err;
+
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 255)
+		return -EINVAL;
+
+	mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val);
+	err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n");
+		return err;
+	}
+	return len;
+}
+
+enum mlxsw_hwmon_attr_type {
+	MLXSW_HWMON_ATTR_TYPE_TEMP,
+	MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
+	MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+	MLXSW_HWMON_ATTR_TYPE_PWM,
+};
+
+static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
+				 enum mlxsw_hwmon_attr_type attr_type,
+				 unsigned int type_index, unsigned int num) {
+	struct mlxsw_hwmon_attr *mlxsw_hwmon_attr;
+	unsigned int attr_index;
+
+	attr_index = mlxsw_hwmon->attrs_count;
+	mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index];
+
+	switch (attr_type) {
+	case MLXSW_HWMON_ATTR_TYPE_TEMP:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_input", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "temp%u_highest", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_FAN_RPM:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "fan%u_input", num + 1);
+		break;
+	case MLXSW_HWMON_ATTR_TYPE_PWM:
+		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
+		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO;
+		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+			 "pwm%u", num + 1);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	mlxsw_hwmon_attr->type_index = type_index;
+	mlxsw_hwmon_attr->hwmon = mlxsw_hwmon;
+	mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name;
+	sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr);
+
+	mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr;
+	mlxsw_hwmon->attrs_count++;
+}
+
+static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	char mtcap_pl[MLXSW_REG_MTCAP_LEN];
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u8 sensor_count;
+	int i;
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n");
+		return err;
+	}
+	sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
+	for (i = 0; i < sensor_count; i++) {
+		mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
+		err = mlxsw_reg_write(mlxsw_hwmon->core,
+				      MLXSW_REG(mtmp), mtmp_pl);
+		if (err) {
+			dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+				i);
+			return err;
+		}
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP, i, i);
+		mlxsw_hwmon_attr_add(mlxsw_hwmon,
+				     MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i);
+	}
+	return 0;
+}
+
+static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	char mfcr_pl[MLXSW_REG_MFCR_LEN];
+	enum mlxsw_reg_mfcr_pwm_frequency freq;
+	unsigned int type_index;
+	unsigned int num;
+	u16 tacho_active;
+	u8 pwm_active;
+	int err;
+
+	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl);
+	if (err) {
+		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n");
+		return err;
+	}
+	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
+	num = 0;
+	for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) {
+		if (tacho_active & BIT(type_index))
+			mlxsw_hwmon_attr_add(mlxsw_hwmon,
+					     MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+					     type_index, num++);
+	}
+	num = 0;
+	for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) {
+		if (pwm_active & BIT(type_index))
+			mlxsw_hwmon_attr_add(mlxsw_hwmon,
+					     MLXSW_HWMON_ATTR_TYPE_PWM,
+					     type_index, num++);
+	}
+	return 0;
+}
+
+int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+		     const struct mlxsw_bus_info *mlxsw_bus_info,
+		     struct mlxsw_hwmon **p_hwmon)
+{
+	struct mlxsw_hwmon *mlxsw_hwmon;
+	struct device *hwmon_dev;
+	int err;
+
+	mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL);
+	if (!mlxsw_hwmon)
+		return -ENOMEM;
+	mlxsw_hwmon->core = mlxsw_core;
+	mlxsw_hwmon->bus_info = mlxsw_bus_info;
+
+	err = mlxsw_hwmon_temp_init(mlxsw_hwmon);
+	if (err)
+		goto err_temp_init;
+
+	err = mlxsw_hwmon_fans_init(mlxsw_hwmon);
+	if (err)
+		goto err_fans_init;
+
+	mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
+	mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev,
+							   "mlxsw",
+							   mlxsw_hwmon,
+							   mlxsw_hwmon->groups);
+	if (IS_ERR(hwmon_dev)) {
+		err = PTR_ERR(hwmon_dev);
+		goto err_hwmon_register;
+	}
+
+	mlxsw_hwmon->hwmon_dev = hwmon_dev;
+	*p_hwmon = mlxsw_hwmon;
+	return 0;
+
+err_hwmon_register:
+err_fans_init:
+err_temp_init:
+	kfree(mlxsw_hwmon);
+	return err;
+}
+
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+	kfree(mlxsw_hwmon);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index de69e719dc9d..d2102e572b1d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 	if (q->consumer_counter++ != consumer_counter_limit)
 		dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
 
-	/* We do not support lag now */
-	if (mlxsw_pci_cqe_lag_get(cqe))
-		goto drop;
+	if (mlxsw_pci_cqe_lag_get(cqe)) {
+		rx_info.is_lag = true;
+		rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe);
+		rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe);
+	} else {
+		rx_info.is_lag = false;
+		rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
+	}
 
-	rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
 	rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
 
 	byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
@@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 	skb_put(skb, byte_count);
 	mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
 
-put_new_skb:
 	memset(wqe, 0, q->elem_size);
 	err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
 	if (err)
@@ -708,10 +711,6 @@ put_new_skb:
 	q->producer_counter++;
 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
 	return;
-
-drop:
-	dev_kfree_skb_any(skb);
-	goto put_new_skb;
 }
 
 static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 142f33d978c5..912106054ff2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
  */
 MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
 
-/* pci_cqe_system_port
+/* pci_cqe_system_port/lag_id
  * When lag=0: System port on which the packet was received
  * When lag=1:
  * bits [15:4] LAG ID on which the packet was received
  * bits [3:0] sub_port on which the packet was received
  */
 MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
 
 /* pci_cqe_wqe_counter
  * WQE count of the WQEs completed on the associated dqn
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 236fb5d2ad69..af631df4603a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8,
 
 enum mlxsw_reg_sfd_rec_type {
 	MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0,
+	MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1,
 };
 
 /* reg_sfd_rec_type
@@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
 MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16,
 		     MLXSW_REG_SFD_REC_LEN, 0x0C, false);
 
-static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
-					 enum mlxsw_reg_sfd_rec_policy policy,
-					 const char *mac, u16 vid,
-					 enum mlxsw_reg_sfd_rec_action action,
-					 u8 local_port)
+static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index,
+					  enum mlxsw_reg_sfd_rec_type rec_type,
+					  enum mlxsw_reg_sfd_rec_policy policy,
+					  const char *mac,
+					  enum mlxsw_reg_sfd_rec_action action)
 {
 	u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload);
 
 	if (rec_index >= num_rec)
 		mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1);
 	mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0);
-	mlxsw_reg_sfd_rec_type_set(payload, rec_index,
-				   MLXSW_REG_SFD_REC_TYPE_UNICAST);
+	mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type);
 	mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
 	mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac);
+	mlxsw_reg_sfd_rec_action_set(payload, rec_index, action);
+}
+
+static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
+					 enum mlxsw_reg_sfd_rec_policy policy,
+					 const char *mac, u16 vid,
+					 enum mlxsw_reg_sfd_rec_action action,
+					 u8 local_port)
+{
+	mlxsw_reg_sfd_rec_pack(payload, rec_index,
+			       MLXSW_REG_SFD_REC_TYPE_UNICAST,
+			       policy, mac, action);
 	mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0);
 	mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid);
-	mlxsw_reg_sfd_rec_action_set(payload, rec_index, action);
 	mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port);
 }
 
@@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index,
 	*p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index);
 }
 
+/* reg_sfd_uc_lag_sub_port
+ * LAG sub port.
+ * Must be 0 if multichannel VEPA is not enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
+		     MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_fid_vid
+ * Filtering ID or VLAN ID
+ * For SwitchX and SwitchX-2:
+ * - Dynamic entries (policy 2,3) use FID
+ * - Static entries (policy 0) use VID
+ * - When independent learning is configured, VID=FID
+ * For Spectrum: use FID for both Dynamic and Static entries.
+ * VID should not be used.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+		     MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_lag_id
+ * LAG Identifier - pointer into the LAG descriptor table.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10,
+		     MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index,
+			  enum mlxsw_reg_sfd_rec_policy policy,
+			  const char *mac, u16 vid,
+			  enum mlxsw_reg_sfd_rec_action action,
+			  u16 lag_id)
+{
+	mlxsw_reg_sfd_rec_pack(payload, rec_index,
+			       MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG,
+			       policy, mac, action);
+	mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0);
+	mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid);
+	mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id);
+}
+
+static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index,
+					       char *mac, u16 *p_vid,
+					       u16 *p_lag_id)
+{
+	mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac);
+	*p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index);
+	*p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index);
+}
+
 /* SFN - Switch FDB Notification Register
  * -------------------------------------------
  * The switch provides notifications on newly learned FDB entries and
@@ -456,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8,
 enum mlxsw_reg_sfn_rec_type {
 	/* MAC addresses learned on a regular port. */
 	MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5,
-	/* Aged-out MAC address on a regular port */
+	/* MAC addresses learned on a LAG port. */
+	MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6,
+	/* Aged-out MAC address on a regular port. */
 	MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7,
+	/* Aged-out MAC address on a LAG port. */
+	MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8,
 };
 
 /* reg_sfn_rec_type
@@ -505,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index,
 	*p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index);
 }
 
+/* reg_sfn_mac_lag_lag_id
+ * LAG ID (pointer into the LAG descriptor table).
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10,
+		     MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index,
+						char *mac, u16 *p_vid,
+						u16 *p_lag_id)
+{
+	mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
+	*p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
+	*p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index);
+}
+
 /* SPMS - Switch Port MSTP/RSTP State Register
  * -------------------------------------------
  * Configures the spanning tree state of a physical port.
@@ -865,6 +948,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
 	mlxsw_reg_sftr_port_mask_set(payload, port, 1);
 }
 
+/* SLDR - Switch LAG Descriptor Register
+ * -----------------------------------------
+ * The switch LAG descriptor register is populated by LAG descriptors.
+ * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to
+ * max_lag-1.
+ */
+#define MLXSW_REG_SLDR_ID 0x2014
+#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */
+
+static const struct mlxsw_reg_info mlxsw_reg_sldr = {
+	.id = MLXSW_REG_SLDR_ID,
+	.len = MLXSW_REG_SLDR_LEN,
+};
+
+enum mlxsw_reg_sldr_op {
+	/* Indicates a creation of a new LAG-ID, lag_id must be valid */
+	MLXSW_REG_SLDR_OP_LAG_CREATE,
+	MLXSW_REG_SLDR_OP_LAG_DESTROY,
+	/* Ports that appear in the list have the Distributor enabled */
+	MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST,
+	/* Removes ports from the disributor list */
+	MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST,
+};
+
+/* reg_sldr_op
+ * Operation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3);
+
+/* reg_sldr_lag_id
+ * LAG identifier. The lag_id is the index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10);
+
+static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id)
+{
+	MLXSW_REG_ZERO(sldr, payload);
+	mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE);
+	mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id)
+{
+	MLXSW_REG_ZERO(sldr, payload);
+	mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY);
+	mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+/* reg_sldr_num_ports
+ * The number of member ports of the LAG.
+ * Reserved for Create / Destroy operations
+ * For Add / Remove operations - indicates the number of ports in the list.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8);
+
+/* reg_sldr_system_port
+ * System port.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id,
+						    u8 local_port)
+{
+	MLXSW_REG_ZERO(sldr, payload);
+	mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST);
+	mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+	mlxsw_reg_sldr_num_ports_set(payload, 1);
+	mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id,
+						       u8 local_port)
+{
+	MLXSW_REG_ZERO(sldr, payload);
+	mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST);
+	mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+	mlxsw_reg_sldr_num_ports_set(payload, 1);
+	mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+/* SLCR - Switch LAG Configuration 2 Register
+ * -------------------------------------------
+ * The Switch LAG Configuration register is used for configuring the
+ * LAG properties of the switch.
+ */
+#define MLXSW_REG_SLCR_ID 0x2015
+#define MLXSW_REG_SLCR_LEN 0x10
+
+static const struct mlxsw_reg_info mlxsw_reg_slcr = {
+	.id = MLXSW_REG_SLCR_ID,
+	.len = MLXSW_REG_SLCR_LEN,
+};
+
+enum mlxsw_reg_slcr_pp {
+	/* Global Configuration (for all ports) */
+	MLXSW_REG_SLCR_PP_GLOBAL,
+	/* Per port configuration, based on local_port field */
+	MLXSW_REG_SLCR_PP_PER_PORT,
+};
+
+/* reg_slcr_pp
+ * Per Port Configuration
+ * Note: Reading at Global mode results in reading port 1 configuration.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1);
+
+/* reg_slcr_local_port
+ * Local port number
+ * Supported from CPU port
+ * Not supported from router port
+ * Reserved when pp = Global Configuration
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_slcr_type {
+	MLXSW_REG_SLCR_TYPE_CRC, /* default */
+	MLXSW_REG_SLCR_TYPE_XOR,
+	MLXSW_REG_SLCR_TYPE_RANDOM,
+};
+
+/* reg_slcr_type
+ * Hash type
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4);
+
+/* Ingress port */
+#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT		BIT(0)
+/* SMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP		BIT(1)
+/* SMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP	BIT(2)
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC \
+	(MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \
+	 MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP)
+/* DMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP		BIT(3)
+/* DMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP	BIT(4)
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC \
+	(MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \
+	 MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP)
+/* Ethertype - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP	BIT(5)
+/* Ethertype - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP	BIT(6)
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \
+	(MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \
+	 MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP)
+/* VLAN ID - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP	BIT(7)
+/* VLAN ID - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP	BIT(8)
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID \
+	(MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \
+	 MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP)
+/* Source IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_SIP		BIT(9)
+/* Destination IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_DIP		BIT(10)
+/* TCP/UDP source port */
+#define MLXSW_REG_SLCR_LAG_HASH_SPORT		BIT(11)
+/* TCP/UDP destination port*/
+#define MLXSW_REG_SLCR_LAG_HASH_DPORT		BIT(12)
+/* IPv4 Protocol/IPv6 Next Header */
+#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO		BIT(13)
+/* IPv6 Flow label */
+#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL	BIT(14)
+/* SID - FCoE source ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID	BIT(15)
+/* DID - FCoE destination ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID	BIT(16)
+/* OXID - FCoE originator exchange ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID	BIT(17)
+/* Destination QP number - for RoCE packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP	BIT(19)
+
+/* reg_slcr_lag_hash
+ * LAG hashing configuration. This is a bitmask, in which each set
+ * bit includes the corresponding item in the LAG hash calculation.
+ * The default lag_hash contains SMAC, DMAC, VLANID and
+ * Ethertype (for all packet types).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20);
+
+static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
+{
+	MLXSW_REG_ZERO(slcr, payload);
+	mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
+	mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR);
+	mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
+}
+
+/* SLCOR - Switch LAG Collector Register
+ * -------------------------------------
+ * The Switch LAG Collector register controls the Local Port membership
+ * in a LAG and enablement of the collector.
+ */
+#define MLXSW_REG_SLCOR_ID 0x2016
+#define MLXSW_REG_SLCOR_LEN 0x10
+
+static const struct mlxsw_reg_info mlxsw_reg_slcor = {
+	.id = MLXSW_REG_SLCOR_ID,
+	.len = MLXSW_REG_SLCOR_LEN,
+};
+
+enum mlxsw_reg_slcor_col {
+	/* Port is added with collector disabled */
+	MLXSW_REG_SLCOR_COL_LAG_ADD_PORT,
+	MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED,
+	MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED,
+	MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT,
+};
+
+/* reg_slcor_col
+ * Collector configuration
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2);
+
+/* reg_slcor_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8);
+
+/* reg_slcor_lag_id
+ * LAG Identifier. Index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10);
+
+/* reg_slcor_port_index
+ * Port index in the LAG list. Only valid on Add Port to LAG col.
+ * Valid range is from 0 to cap_max_lag_members-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10);
+
+static inline void mlxsw_reg_slcor_pack(char *payload,
+					u8 local_port, u16 lag_id,
+					enum mlxsw_reg_slcor_col col)
+{
+	MLXSW_REG_ZERO(slcor, payload);
+	mlxsw_reg_slcor_col_set(payload, col);
+	mlxsw_reg_slcor_local_port_set(payload, local_port);
+	mlxsw_reg_slcor_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_slcor_port_add_pack(char *payload,
+						 u8 local_port, u16 lag_id,
+						 u8 port_index)
+{
+	mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+			     MLXSW_REG_SLCOR_COL_LAG_ADD_PORT);
+	mlxsw_reg_slcor_port_index_set(payload, port_index);
+}
+
+static inline void mlxsw_reg_slcor_port_remove_pack(char *payload,
+						    u8 local_port, u16 lag_id)
+{
+	mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+			     MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT);
+}
+
+static inline void mlxsw_reg_slcor_col_enable_pack(char *payload,
+						   u8 local_port, u16 lag_id)
+{
+	mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+			     MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
+static inline void mlxsw_reg_slcor_col_disable_pack(char *payload,
+						    u8 local_port, u16 lag_id)
+{
+	mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+			     MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
 /* SPMLR - Switch Port MAC Learning Register
  * -----------------------------------------
  * Controls the Switch MAC learning policy per port.
@@ -2087,6 +2457,284 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
 	mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT);
 }
 
+/* MFCR - Management Fan Control Register
+ * --------------------------------------
+ * This register controls the settings of the Fan Speed PWM mechanism.
+ */
+#define MLXSW_REG_MFCR_ID 0x9001
+#define MLXSW_REG_MFCR_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_mfcr = {
+	.id = MLXSW_REG_MFCR_ID,
+	.len = MLXSW_REG_MFCR_LEN,
+};
+
+enum mlxsw_reg_mfcr_pwm_frequency {
+	MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00,
+	MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01,
+	MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02,
+	MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40,
+	MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41,
+	MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42,
+	MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43,
+	MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44,
+};
+
+/* reg_mfcr_pwm_frequency
+ * Controls the frequency of the PWM signal.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6);
+
+#define MLXSW_MFCR_TACHOS_MAX 10
+
+/* reg_mfcr_tacho_active
+ * Indicates which of the tachometer is active (bit per tachometer).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX);
+
+#define MLXSW_MFCR_PWMS_MAX 5
+
+/* reg_mfcr_pwm_active
+ * Indicates which of the PWM control is active (bit per PWM).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX);
+
+static inline void
+mlxsw_reg_mfcr_pack(char *payload,
+		    enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency)
+{
+	MLXSW_REG_ZERO(mfcr, payload);
+	mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency);
+}
+
+static inline void
+mlxsw_reg_mfcr_unpack(char *payload,
+		      enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency,
+		      u16 *p_tacho_active, u8 *p_pwm_active)
+{
+	*p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload);
+	*p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload);
+	*p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload);
+}
+
+/* MFSC - Management Fan Speed Control Register
+ * --------------------------------------------
+ * This register controls the settings of the Fan Speed PWM mechanism.
+ */
+#define MLXSW_REG_MFSC_ID 0x9002
+#define MLXSW_REG_MFSC_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_mfsc = {
+	.id = MLXSW_REG_MFSC_ID,
+	.len = MLXSW_REG_MFSC_LEN,
+};
+
+/* reg_mfsc_pwm
+ * Fan pwm to control / monitor.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3);
+
+/* reg_mfsc_pwm_duty_cycle
+ * Controls the duty cycle of the PWM. Value range from 0..255 to
+ * represent duty cycle of 0%...100%.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8);
+
+static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm,
+				       u8 pwm_duty_cycle)
+{
+	MLXSW_REG_ZERO(mfsc, payload);
+	mlxsw_reg_mfsc_pwm_set(payload, pwm);
+	mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle);
+}
+
+/* MFSM - Management Fan Speed Measurement
+ * ---------------------------------------
+ * This register controls the settings of the Tacho measurements and
+ * enables reading the Tachometer measurements.
+ */
+#define MLXSW_REG_MFSM_ID 0x9003
+#define MLXSW_REG_MFSM_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_mfsm = {
+	.id = MLXSW_REG_MFSM_ID,
+	.len = MLXSW_REG_MFSM_LEN,
+};
+
+/* reg_mfsm_tacho
+ * Fan tachometer index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4);
+
+/* reg_mfsm_rpm
+ * Fan speed (round per minute).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho)
+{
+	MLXSW_REG_ZERO(mfsm, payload);
+	mlxsw_reg_mfsm_tacho_set(payload, tacho);
+}
+
+/* MTCAP - Management Temperature Capabilities
+ * -------------------------------------------
+ * This register exposes the capabilities of the device and
+ * system temperature sensing.
+ */
+#define MLXSW_REG_MTCAP_ID 0x9009
+#define MLXSW_REG_MTCAP_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_mtcap = {
+	.id = MLXSW_REG_MTCAP_ID,
+	.len = MLXSW_REG_MTCAP_LEN,
+};
+
+/* reg_mtcap_sensor_count
+ * Number of sensors supported by the device.
+ * This includes the QSFP module sensors (if exists in the QSFP module).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
+
+/* MTMP - Management Temperature
+ * -----------------------------
+ * This register controls the settings of the temperature measurements
+ * and enables reading the temperature measurements. Note that temperature
+ * is in 0.125 degrees Celsius.
+ */
+#define MLXSW_REG_MTMP_ID 0x900A
+#define MLXSW_REG_MTMP_LEN 0x20
+
+static const struct mlxsw_reg_info mlxsw_reg_mtmp = {
+	.id = MLXSW_REG_MTMP_ID,
+	.len = MLXSW_REG_MTMP_LEN,
+};
+
+/* reg_mtmp_sensor_index
+ * Sensors index to access.
+ * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
+ * (module 0 is mapped to sensor_index 64).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+
+/* Convert to milli degrees Celsius */
+#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
+
+/* reg_mtmp_temperature
+ * Temperature reading from the sensor. Reading is in 0.125 Celsius
+ * degrees units.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16);
+
+/* reg_mtmp_mte
+ * Max Temperature Enable - enables measuring the max temperature on a sensor.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1);
+
+/* reg_mtmp_mtr
+ * Max Temperature Reset - clears the value of the max temperature register.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1);
+
+/* reg_mtmp_max_temperature
+ * The highest measured temperature from the sensor.
+ * When the bit mte is cleared, the field max_temperature is reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16);
+
+#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8
+
+/* reg_mtmp_sensor_name
+ * Sensor Name
+ * Access: RO
+ */
+MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
+
+static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+				       bool max_temp_enable,
+				       bool max_temp_reset)
+{
+	MLXSW_REG_ZERO(mtmp, payload);
+	mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index);
+	mlxsw_reg_mtmp_mte_set(payload, max_temp_enable);
+	mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset);
+}
+
+static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
+					 unsigned int *p_max_temp,
+					 char *sensor_name)
+{
+	u16 temp;
+
+	if (p_temp) {
+		temp = mlxsw_reg_mtmp_temperature_get(payload);
+		*p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+	}
+	if (p_max_temp) {
+		temp = mlxsw_reg_mtmp_max_temperature_get(payload);
+		*p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+	}
+	if (sensor_name)
+		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
+}
+
+/* MLCR - Management LED Control Register
+ * --------------------------------------
+ * Controls the system LEDs.
+ */
+#define MLXSW_REG_MLCR_ID 0x902B
+#define MLXSW_REG_MLCR_LEN 0x0C
+
+static const struct mlxsw_reg_info mlxsw_reg_mlcr = {
+	.id = MLXSW_REG_MLCR_ID,
+	.len = MLXSW_REG_MLCR_LEN,
+};
+
+/* reg_mlcr_local_port
+ * Local port number.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8);
+
+#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF
+
+/* reg_mlcr_beacon_duration
+ * Duration of the beacon to be active, in seconds.
+ * 0x0 - Will turn off the beacon.
+ * 0xFFFF - Will turn on the beacon until explicitly turned off.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16);
+
+/* reg_mlcr_beacon_remain
+ * Remaining duration of the beacon, in seconds.
+ * 0xFFFF indicates an infinite amount of time.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16);
+
+static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
+				       bool active)
+{
+	MLXSW_REG_ZERO(mlcr, payload);
+	mlxsw_reg_mlcr_local_port_set(payload, local_port);
+	mlxsw_reg_mlcr_beacon_duration_set(payload, active ?
+					   MLXSW_REG_MLCR_DURATION_MAX : 0);
+}
+
 /* SBPR - Shared Buffer Pools Register
  * -----------------------------------
  * The SBPR configures and retrieves the shared buffer pools and configuration.
@@ -2375,6 +3023,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "SFGC";
 	case MLXSW_REG_SFTR_ID:
 		return "SFTR";
+	case MLXSW_REG_SLDR_ID:
+		return "SLDR";
+	case MLXSW_REG_SLCR_ID:
+		return "SLCR";
+	case MLXSW_REG_SLCOR_ID:
+		return "SLCOR";
 	case MLXSW_REG_SPMLR_ID:
 		return "SPMLR";
 	case MLXSW_REG_SVFA_ID:
@@ -2405,6 +3059,18 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
 		return "HTGT";
 	case MLXSW_REG_HPKT_ID:
 		return "HPKT";
+	case MLXSW_REG_MFCR_ID:
+		return "MFCR";
+	case MLXSW_REG_MFSC_ID:
+		return "MFSC";
+	case MLXSW_REG_MFSM_ID:
+		return "MFSM";
+	case MLXSW_REG_MTCAP_ID:
+		return "MTCAP";
+	case MLXSW_REG_MTMP_ID:
+		return "MTMP";
+	case MLXSW_REG_MLCR_ID:
+		return "MLCR";
 	case MLXSW_REG_SBPR_ID:
 		return "SBPR";
 	case MLXSW_REG_SBCM_ID:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3be4a2355ead..322ed544348f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
+static void mlxsw_sp_set_rx_mode(struct net_device *dev)
+{
+}
+
 static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
@@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
 	.ndo_open		= mlxsw_sp_port_open,
 	.ndo_stop		= mlxsw_sp_port_stop,
 	.ndo_start_xmit		= mlxsw_sp_port_xmit,
+	.ndo_set_rx_mode	= mlxsw_sp_set_rx_mode,
 	.ndo_set_mac_address	= mlxsw_sp_port_set_mac_address,
 	.ndo_change_mtu		= mlxsw_sp_port_change_mtu,
 	.ndo_get_stats64	= mlxsw_sp_port_get_stats64,
@@ -859,6 +864,29 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev,
 	}
 }
 
+static int mlxsw_sp_port_set_phys_id(struct net_device *dev,
+				     enum ethtool_phys_id_state state)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char mlcr_pl[MLXSW_REG_MLCR_LEN];
+	bool active;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		active = true;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		active = false;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl);
+}
+
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
 				    struct ethtool_stats *stats, u64 *data)
 {
@@ -1205,6 +1233,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.get_drvinfo		= mlxsw_sp_port_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_strings		= mlxsw_sp_port_get_strings,
+	.set_phys_id		= mlxsw_sp_port_set_phys_id,
 	.get_ethtool_stats	= mlxsw_sp_port_get_stats,
 	.get_sset_count		= mlxsw_sp_port_get_sset_count,
 	.get_settings		= mlxsw_sp_port_get_settings,
@@ -1683,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
+static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char slcr_pl[MLXSW_REG_SLCR_LEN];
+
+	mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
+				     MLXSW_REG_SLCR_LAG_HASH_DMAC |
+				     MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
+				     MLXSW_REG_SLCR_LAG_HASH_VLANID |
+				     MLXSW_REG_SLCR_LAG_HASH_SIP |
+				     MLXSW_REG_SLCR_LAG_HASH_DIP |
+				     MLXSW_REG_SLCR_LAG_HASH_SPORT |
+				     MLXSW_REG_SLCR_LAG_HASH_DPORT |
+				     MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+}
+
 static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
 			 const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -1728,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
 		goto err_buffers_init;
 	}
 
+	err = mlxsw_sp_lag_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n");
+		goto err_lag_init;
+	}
+
 	err = mlxsw_sp_switchdev_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
@@ -1737,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_switchdev_init:
+err_lag_init:
 err_buffers_init:
 err_flood_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
@@ -1764,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.used_max_vepa_channels		= 1,
 	.max_vepa_channels		= 0,
 	.used_max_lag			= 1,
-	.max_lag			= 64,
+	.max_lag			= MLXSW_SP_LAG_MAX,
 	.used_max_port_per_lag		= 1,
-	.max_port_per_lag		= 16,
+	.max_port_per_lag		= MLXSW_SP_PORT_PER_LAG_MAX,
 	.used_max_mid			= 1,
 	.max_mid			= 7000,
 	.used_max_pgt			= 1,
@@ -1865,19 +1917,245 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp->master_bridge.dev = NULL;
 }
 
-static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
-				    unsigned long event, void *ptr)
+static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+	char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+	mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+	char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+	mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 lag_id, u8 port_index)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+	mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port,
+				      lag_id, port_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+					u16 lag_id)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+	mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port,
+					 lag_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port,
+					u16 lag_id)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+	mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port,
+					lag_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port,
+					 u16 lag_id)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+	mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port,
+					 lag_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+				  struct net_device *lag_dev,
+				  u16 *p_lag_id)
+{
+	struct mlxsw_sp_upper *lag;
+	int free_lag_id = -1;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_LAG_MAX; i++) {
+		lag = mlxsw_sp_lag_get(mlxsw_sp, i);
+		if (lag->ref_count) {
+			if (lag->dev == lag_dev) {
+				*p_lag_id = i;
+				return 0;
+			}
+		} else if (free_lag_id < 0) {
+			free_lag_id = i;
+		}
+	}
+	if (free_lag_id < 0)
+		return -EBUSY;
+	*p_lag_id = free_lag_id;
+	return 0;
+}
+
+static bool
+mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
+			  struct net_device *lag_dev,
+			  struct netdev_lag_upper_info *lag_upper_info)
+{
+	u16 lag_id;
+
+	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0)
+		return false;
+	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+		return false;
+	return true;
+}
+
+static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+				       u16 lag_id, u8 *p_port_index)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+		if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
+			*p_port_index = i;
+			return 0;
+		}
+	}
+	return -EBUSY;
+}
+
+static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+				  struct net_device *lag_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_upper *lag;
+	u16 lag_id;
+	u8 port_index;
+	int err;
+
+	err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id);
+	if (err)
+		return err;
+	lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+	if (!lag->ref_count) {
+		err = mlxsw_sp_lag_create(mlxsw_sp, lag_id);
+		if (err)
+			return err;
+		lag->dev = lag_dev;
+	}
+
+	err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index);
+	if (err)
+		return err;
+	err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
+	if (err)
+		goto err_col_port_add;
+	err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id);
+	if (err)
+		goto err_col_port_enable;
+
+	mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index,
+				   mlxsw_sp_port->local_port);
+	mlxsw_sp_port->lag_id = lag_id;
+	mlxsw_sp_port->lagged = 1;
+	lag->ref_count++;
+	return 0;
+
+err_col_port_add:
+	if (!lag->ref_count)
+		mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+err_col_port_enable:
+	mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
+	return err;
+}
+
+static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct net_device *lag_dev)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_upper *lag;
+	u16 lag_id = mlxsw_sp_port->lag_id;
+	int err;
+
+	if (!mlxsw_sp_port->lagged)
+		return 0;
+	lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+	WARN_ON(lag->ref_count == 0);
+
+	err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id);
+	if (err)
+		return err;
+	err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
+	if (err)
+		return err;
+
+	if (lag->ref_count == 1) {
+		err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+		if (err)
+			return err;
+	}
+
+	mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
+				     mlxsw_sp_port->local_port);
+	mlxsw_sp_port->lagged = 0;
+	lag->ref_count--;
+	return 0;
+}
+
+static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+				      u16 lag_id)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+	mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id,
+					 mlxsw_sp_port->local_port);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+					 u16 lag_id)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+	mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id,
+					    mlxsw_sp_port->local_port);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				       bool lag_tx_enabled)
+{
+	if (lag_tx_enabled)
+		return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port,
+						  mlxsw_sp_port->lag_id);
+	else
+		return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
+						     mlxsw_sp_port->lag_id);
+}
+
+static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
+				     struct netdev_lag_lower_state_info *info)
+{
+	return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled);
+}
+
+static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev,
+					       unsigned long event, void *ptr)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *upper_dev;
 	struct mlxsw_sp *mlxsw_sp;
 	int err;
 
-	if (!mlxsw_sp_port_dev_check(dev))
-		return NOTIFY_DONE;
-
 	mlxsw_sp_port = netdev_priv(dev);
 	mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	info = ptr;
@@ -1885,16 +2163,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
+		if (!info->master || !info->linking)
+			break;
 		/* HW limitation forbids to put ports to multiple bridges. */
-		if (info->master && info->linking &&
-		    netif_is_bridge_master(upper_dev) &&
+		if (netif_is_bridge_master(upper_dev) &&
 		    !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev))
 			return NOTIFY_BAD;
+		if (netif_is_lag_master(upper_dev) &&
+		    !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
+					       info->upper_info))
+			return NOTIFY_BAD;
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (info->master &&
-		    netif_is_bridge_master(upper_dev)) {
+		if (!info->master)
+			break;
+		if (netif_is_bridge_master(upper_dev)) {
 			if (info->linking) {
 				err = mlxsw_sp_port_bridge_join(mlxsw_sp_port);
 				if (err)
@@ -1908,6 +2192,46 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				mlxsw_sp_port->bridged = 0;
 				mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev);
 			}
+		} else if (netif_is_lag_master(upper_dev)) {
+			if (info->linking) {
+				err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
+							     upper_dev);
+				if (err) {
+					netdev_err(dev, "Failed to join link aggregation\n");
+					return NOTIFY_BAD;
+				}
+			} else {
+				err = mlxsw_sp_port_lag_leave(mlxsw_sp_port,
+							      upper_dev);
+				if (err) {
+					netdev_err(dev, "Failed to leave link aggregation\n");
+					return NOTIFY_BAD;
+				}
+			}
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
+					       unsigned long event, void *ptr)
+{
+	struct netdev_notifier_changelowerstate_info *info;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
+
+	mlxsw_sp_port = netdev_priv(dev);
+	info = ptr;
+
+	switch (event) {
+	case NETDEV_CHANGELOWERSTATE:
+		if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) {
+			err = mlxsw_sp_port_lag_changed(mlxsw_sp_port,
+							info->lower_state_info);
+			if (err)
+				netdev_err(dev, "Failed to reflect link aggregation lower state change\n");
 		}
 		break;
 	}
@@ -1915,6 +2239,52 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+static int mlxsw_sp_netdevice_port_event(struct net_device *dev,
+					 unsigned long event, void *ptr)
+{
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+	case NETDEV_CHANGEUPPER:
+		return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr);
+	case NETDEV_CHANGELOWERSTATE:
+		return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
+					unsigned long event, void *ptr)
+{
+	struct net_device *dev;
+	struct list_head *iter;
+	int ret;
+
+	netdev_for_each_lower_dev(lag_dev, dev, iter) {
+		if (mlxsw_sp_port_dev_check(dev)) {
+			ret = mlxsw_sp_netdevice_port_event(dev, event, ptr);
+			if (ret == NOTIFY_BAD)
+				return ret;
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (mlxsw_sp_port_dev_check(dev))
+		return mlxsw_sp_netdevice_port_event(dev, event, ptr);
+
+	if (netif_is_lag_master(dev))
+		return mlxsw_sp_netdevice_lag_event(dev, event, ptr);
+
+	return NOTIFY_DONE;
+}
+
 static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_netdevice_event,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4365c8bccc6d..48be5a63b9b5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -46,9 +46,16 @@
 #include "core.h"
 
 #define MLXSW_SP_VFID_BASE VLAN_N_VID
+#define MLXSW_SP_LAG_MAX 64
+#define MLXSW_SP_PORT_PER_LAG_MAX 16
 
 struct mlxsw_sp_port;
 
+struct mlxsw_sp_upper {
+	struct net_device *dev;
+	unsigned int ref_count;
+};
+
 struct mlxsw_sp {
 	unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)];
 	unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)];
@@ -63,12 +70,16 @@ struct mlxsw_sp {
 	} fdb_notify;
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
 	u32 ageing_time;
-	struct {
-		struct net_device *dev;
-		unsigned int ref_count;
-	} master_bridge;
+	struct mlxsw_sp_upper master_bridge;
+	struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
 };
 
+static inline struct mlxsw_sp_upper *
+mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+	return &mlxsw_sp->lags[lag_id];
+}
+
 struct mlxsw_sp_port_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
@@ -87,8 +98,10 @@ struct mlxsw_sp_port {
 	u8 learning:1,
 	   learning_sync:1,
 	   uc_flood:1,
-	   bridged:1;
+	   bridged:1,
+	   lagged:1;
 	u16 pvid;
+	u16 lag_id;
 	/* 802.1Q bridge VLANs */
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	/* VLAN interfaces */
@@ -96,6 +109,18 @@ struct mlxsw_sp_port {
 	u16 nr_vfids;
 };
 
+static inline struct mlxsw_sp_port *
+mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	u8 local_port;
+
+	local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+						lag_id, port_index);
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL;
+}
+
 enum mlxsw_sp_flood_table {
 	MLXSW_SP_FLOOD_TABLE_UC,
 	MLXSW_SP_FLOOD_TABLE_BM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f21e23983a1a..406dab2f6b17 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -490,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 					 untagged_flag, pvid_flag);
 }
 
-static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port,
-				const char *mac, u16 vid, bool adding,
-				bool dynamic)
+static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic)
 {
-	enum mlxsw_reg_sfd_rec_policy policy;
-	enum mlxsw_reg_sfd_op op;
+	return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
+			 MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
+}
+
+static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
+{
+	return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT :
+			MLXSW_REG_SFD_OP_WRITE_REMOVE;
+}
+
+static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port,
+				   const char *mac, u16 vid, bool adding,
+				   bool dynamic)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char *sfd_pl;
 	int err;
 
-	if (!vid)
-		vid = mlxsw_sp_port->pvid;
-
 	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
 	if (!sfd_pl)
 		return -ENOMEM;
 
-	policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
-			   MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
-	op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT :
-		      MLXSW_REG_SFD_OP_WRITE_REMOVE;
-	mlxsw_reg_sfd_pack(sfd_pl, op, 0);
-	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy,
+	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
 			      mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
 			      mlxsw_sp_port->local_port);
-	err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd),
-			      sfd_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+	kfree(sfd_pl);
+
+	return err;
+}
+
+static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
+				       const char *mac, u16 vid, bool adding,
+				       bool dynamic)
+{
+	char *sfd_pl;
+	int err;
+
+	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+	if (!sfd_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+	mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
+				  mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP,
+				  lag_id);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	kfree(sfd_pl);
 
 	return err;
@@ -526,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port,
 			     const struct switchdev_obj_port_fdb *fdb,
 			     struct switchdev_trans *trans)
 {
+	u16 vid = fdb->vid;
+
 	if (switchdev_trans_ph_prepare(trans))
 		return 0;
 
-	return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid,
-				    true, false);
+	if (!vid)
+		vid = mlxsw_sp_port->pvid;
+
+	if (!mlxsw_sp_port->lagged)
+		return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
+					       fdb->addr, vid, true, false);
+	else
+		return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
+						   mlxsw_sp_port->lag_id,
+						   fdb->addr, vid, true, false);
 }
 
 static int mlxsw_sp_port_obj_add(struct net_device *dev,
@@ -645,8 +679,15 @@ static int
 mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port,
 			     const struct switchdev_obj_port_fdb *fdb)
 {
-	return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid,
-				    false, false);
+	if (!mlxsw_sp_port->lagged)
+		return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port,
+					       fdb->addr, fdb->vid,
+					       false, false);
+	else
+		return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp,
+						   mlxsw_sp_port->lag_id,
+						   fdb->addr, fdb->vid,
+						   false, false);
 }
 
 static int mlxsw_sp_port_obj_del(struct net_device *dev,
@@ -672,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev,
 	return err;
 }
 
+static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
+						   u16 lag_id)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int i;
+
+	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+		mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
+		if (mlxsw_sp_port)
+			return mlxsw_sp_port;
+	}
+	return NULL;
+}
+
 static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct switchdev_obj_port_fdb *fdb,
 				  switchdev_obj_dump_cb_t *cb)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char *sfd_pl;
 	char mac[ETH_ALEN];
 	u16 vid;
 	u8 local_port;
+	u16 lag_id;
 	u8 num_rec;
 	int stored_err = 0;
 	int i;
@@ -692,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0);
 	do {
 		mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT);
-		err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core,
-				      MLXSW_REG(sfd), sfd_pl);
+		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 		if (err)
 			goto out;
 
@@ -718,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port,
 					if (err)
 						stored_err = err;
 				}
+				break;
+			case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG:
+				mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i,
+							    mac, &vid, &lag_id);
+				if (mlxsw_sp_port ==
+				    mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) {
+					ether_addr_copy(fdb->addr, mac);
+					fdb->ndm_state = NUD_REACHABLE;
+					fdb->vid = vid;
+					err = cb(&fdb->obj);
+					if (err)
+						stored_err = err;
+				}
+				break;
 			}
 		}
 	} while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT);
@@ -779,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
 	.switchdev_port_obj_dump	= mlxsw_sp_port_obj_dump,
 };
 
+static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync,
+					bool adding, char *mac, u16 vid,
+					struct net_device *dev)
+{
+	struct switchdev_notifier_fdb_info info;
+	unsigned long notifier_type;
+
+	if (learning && learning_sync) {
+		info.addr = mac;
+		info.vid = vid;
+		notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
+		call_switchdev_notifiers(notifier_type, dev, &info.info);
+	}
+}
+
 static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 					    char *sfn_pl, int rec_index,
 					    bool adding)
@@ -796,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 		return;
 	}
 
-	err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid,
-				   adding && mlxsw_sp_port->learning, true);
+	err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid,
+				      adding && mlxsw_sp_port->learning, true);
 	if (err) {
 		if (net_ratelimit())
 			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
 		return;
 	}
 
-	if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) {
-		struct switchdev_notifier_fdb_info info;
-		unsigned long notifier_type;
+	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
+				    mlxsw_sp_port->learning_sync,
+				    adding, mac, vid, mlxsw_sp_port->dev);
+}
 
-		info.addr = mac;
-		info.vid = vid;
-		notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL;
-		call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev,
-					 &info.info);
+static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
+						char *sfn_pl, int rec_index,
+						bool adding)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	char mac[ETH_ALEN];
+	u16 lag_id;
+	u16 vid;
+	int err;
+
+	mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id);
+	mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+	if (!mlxsw_sp_port) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n");
+		return;
 	}
+
+	err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid,
+					  adding && mlxsw_sp_port->learning,
+					  true);
+	if (err) {
+		if (net_ratelimit())
+			netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n");
+		return;
+	}
+
+	mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning,
+				    mlxsw_sp_port->learning_sync,
+				    adding, mac, vid,
+				    mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev);
 }
 
 static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
@@ -828,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl,
 						rec_index, false);
 		break;
+	case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG:
+		mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+						    rec_index, true);
+		break;
+	case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG:
+		mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+						    rec_index, false);
+		break;
 	}
 }
 
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
new file mode 100644
index 000000000000..9508ad782c30
--- /dev/null
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -0,0 +1,36 @@
+#
+# Netronome device configuration
+#
+
+config NET_VENDOR_NETRONOME
+	bool "Netronome(R) devices"
+	default y
+	---help---
+	  If you have a Netronome(R) network (Ethernet) card or device, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Netronome(R) cards. If you say Y, you will be
+	  asked for your specific card in the following questions.
+
+if NET_VENDOR_NETRONOME
+
+config NFP_NETVF
+	tristate "Netronome(R) NFP4000/NFP6000 VF NIC driver"
+	depends on PCI && PCI_MSI
+	depends on VXLAN || VXLAN=n
+	---help---
+	  This driver supports SR-IOV virtual functions of
+	  the Netronome(R) NFP4000/NFP6000 cards working as
+	  a advanced Ethernet NIC.
+
+config NFP_NET_DEBUG
+	bool "Debug support for Netronome(R) NFP3200/NFP6000 NIC drivers"
+	depends on NFP_NET || NFP_NETVF
+	---help---
+	  Enable extra sanity checks and debugfs support in
+	  Netronome(R) NFP3200/NFP6000 NIC PF and VF drivers.
+	  Note: selecting this option may adversely impact
+		performance.
+
+endif
diff --git a/drivers/net/ethernet/netronome/Makefile b/drivers/net/ethernet/netronome/Makefile
new file mode 100644
index 000000000000..dcb7b383f634
--- /dev/null
+++ b/drivers/net/ethernet/netronome/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Netronome network device drivers
+#
+
+obj-$(CONFIG_NFP_NETVF) += nfp/
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
new file mode 100644
index 000000000000..68178819ff12
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_NFP_NETVF)	+= nfp_netvf.o
+
+nfp_netvf-objs := \
+	    nfp_net_common.o \
+	    nfp_net_ethtool.o \
+	    nfp_netvf_main.o
+
+nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
new file mode 100644
index 000000000000..ab264e1bccd0
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -0,0 +1,748 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net.h
+ * Declarations for Netronome network device driver.
+ * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
+ *          Jason McMullan <jason.mcmullan@netronome.com>
+ *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
+ */
+
+#ifndef _NFP_NET_H_
+#define _NFP_NET_H_
+
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <asm-generic/io-64-nonatomic-hi-lo.h>
+
+#include "nfp_net_ctrl.h"
+
+#define nn_err(nn, fmt, args...)  netdev_err((nn)->netdev, fmt, ## args)
+#define nn_warn(nn, fmt, args...) netdev_warn((nn)->netdev, fmt, ## args)
+#define nn_info(nn, fmt, args...) netdev_info((nn)->netdev, fmt, ## args)
+#define nn_dbg(nn, fmt, args...)  netdev_dbg((nn)->netdev, fmt, ## args)
+#define nn_warn_ratelimit(nn, fmt, args...)				\
+	do {								\
+		if (unlikely(net_ratelimit()))				\
+			netdev_warn((nn)->netdev, fmt, ## args);	\
+	} while (0)
+
+/* Max time to wait for NFP to respond on updates (in ms) */
+#define NFP_NET_POLL_TIMEOUT	5000
+
+/* Bar allocation */
+#define NFP_NET_CRTL_BAR	0
+#define NFP_NET_Q0_BAR		2
+#define NFP_NET_Q1_BAR		4	/* OBSOLETE */
+
+/* Max bits in DMA address */
+#define NFP_NET_MAX_DMA_BITS	40
+
+/* Default size for MTU and freelist buffer sizes */
+#define NFP_NET_DEFAULT_MTU		1500
+#define NFP_NET_DEFAULT_RX_BUFSZ	2048
+
+/* Maximum number of bytes prepended to a packet */
+#define NFP_NET_MAX_PREPEND		64
+
+/* Interrupt definitions */
+#define NFP_NET_NON_Q_VECTORS		2
+#define NFP_NET_IRQ_LSC_IDX		0
+#define NFP_NET_IRQ_EXN_IDX		1
+
+/* Queue/Ring definitions */
+#define NFP_NET_MAX_TX_RINGS	64	/* Max. # of Tx rings per device */
+#define NFP_NET_MAX_RX_RINGS	64	/* Max. # of Rx rings per device */
+
+#define NFP_NET_MIN_TX_DESCS	256	/* Min. # of Tx descs per ring */
+#define NFP_NET_MIN_RX_DESCS	256	/* Min. # of Rx descs per ring */
+#define NFP_NET_MAX_TX_DESCS	(256 * 1024) /* Max. # of Tx descs per ring */
+#define NFP_NET_MAX_RX_DESCS	(256 * 1024) /* Max. # of Rx descs per ring */
+
+#define NFP_NET_TX_DESCS_DEFAULT 4096	/* Default # of Tx descs per ring */
+#define NFP_NET_RX_DESCS_DEFAULT 4096	/* Default # of Rx descs per ring */
+
+#define NFP_NET_FL_BATCH	16	/* Add freelist in this Batch size */
+
+/* Offload definitions */
+#define NFP_NET_N_VXLAN_PORTS	(NFP_NET_CFG_VXLAN_SZ / sizeof(__be16))
+
+/* Forward declarations */
+struct nfp_net;
+struct nfp_net_r_vector;
+
+/* Convenience macro for writing dma address into RX/TX descriptors */
+#define nfp_desc_set_dma_addr(desc, dma_addr)				\
+	do {								\
+		__typeof(desc) __d = (desc);				\
+		dma_addr_t __addr = (dma_addr);				\
+									\
+		__d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr));	\
+		__d->dma_addr_hi = upper_32_bits(__addr) & 0xff;	\
+	} while (0)
+
+/* TX descriptor format */
+
+#define PCIE_DESC_TX_EOP		BIT(7)
+#define PCIE_DESC_TX_OFFSET_MASK	GENMASK(6, 0)
+#define PCIE_DESC_TX_MSS_MASK		GENMASK(13, 0)
+
+/* Flags in the host TX descriptor */
+#define PCIE_DESC_TX_CSUM		BIT(7)
+#define PCIE_DESC_TX_IP4_CSUM		BIT(6)
+#define PCIE_DESC_TX_TCP_CSUM		BIT(5)
+#define PCIE_DESC_TX_UDP_CSUM		BIT(4)
+#define PCIE_DESC_TX_VLAN		BIT(3)
+#define PCIE_DESC_TX_LSO		BIT(2)
+#define PCIE_DESC_TX_ENCAP		BIT(1)
+#define PCIE_DESC_TX_O_IP4_CSUM	BIT(0)
+
+struct nfp_net_tx_desc {
+	union {
+		struct {
+			u8 dma_addr_hi; /* High bits of host buf address */
+			__le16 dma_len;	/* Length to DMA for this desc */
+			u8 offset_eop;	/* Offset in buf where pkt starts +
+					 * highest bit is eop flag.
+					 */
+			__le32 dma_addr_lo; /* Low 32bit of host buf addr */
+
+			__le16 mss;	/* MSS to be used for LSO */
+			u8 l4_offset;	/* LSO, where the L4 data starts */
+			u8 flags;	/* TX Flags, see @PCIE_DESC_TX_* */
+
+			__le16 vlan;	/* VLAN tag to add if indicated */
+			__le16 data_len; /* Length of frame + meta data */
+		} __packed;
+		__le32 vals[4];
+	};
+};
+
+/**
+ * struct nfp_net_tx_buf - software TX buffer descriptor
+ * @skb:	sk_buff associated with this buffer
+ * @dma_addr:	DMA mapping address of the buffer
+ * @fidx:	Fragment index (-1 for the head and [0..nr_frags-1] for frags)
+ * @pkt_cnt:	Number of packets to be produced out of the skb associated
+ *		with this buffer (valid only on the head's buffer).
+ *		Will be 1 for all non-TSO packets.
+ * @real_len:	Number of bytes which to be produced out of the skb (valid only
+ *		on the head's buffer). Equal to skb->len for non-TSO packets.
+ */
+struct nfp_net_tx_buf {
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	short int fidx;
+	u16 pkt_cnt;
+	u32 real_len;
+};
+
+/**
+ * struct nfp_net_tx_ring - TX ring structure
+ * @r_vec:      Back pointer to ring vector structure
+ * @idx:        Ring index from Linux's perspective
+ * @qcidx:      Queue Controller Peripheral (QCP) queue index for the TX queue
+ * @qcp_q:      Pointer to base of the QCP TX queue
+ * @cnt:        Size of the queue in number of descriptors
+ * @wr_p:       TX ring write pointer (free running)
+ * @rd_p:       TX ring read pointer (free running)
+ * @qcp_rd_p:   Local copy of QCP TX queue read pointer
+ * @wr_ptr_add:	Accumulated number of buffers to add to QCP write pointer
+ *		(used for .xmit_more delayed kick)
+ * @txbufs:     Array of transmitted TX buffers, to free on transmit
+ * @txds:       Virtual address of TX ring in host memory
+ * @dma:        DMA address of the TX ring
+ * @size:       Size, in bytes, of the TX ring (needed to free)
+ */
+struct nfp_net_tx_ring {
+	struct nfp_net_r_vector *r_vec;
+
+	u32 idx;
+	int qcidx;
+	u8 __iomem *qcp_q;
+
+	u32 cnt;
+	u32 wr_p;
+	u32 rd_p;
+	u32 qcp_rd_p;
+
+	u32 wr_ptr_add;
+
+	struct nfp_net_tx_buf *txbufs;
+	struct nfp_net_tx_desc *txds;
+
+	dma_addr_t dma;
+	unsigned int size;
+} ____cacheline_aligned;
+
+/* RX and freelist descriptor format */
+
+#define PCIE_DESC_RX_DD			BIT(7)
+#define PCIE_DESC_RX_META_LEN_MASK	GENMASK(6, 0)
+
+/* Flags in the RX descriptor */
+#define PCIE_DESC_RX_RSS		cpu_to_le16(BIT(15))
+#define PCIE_DESC_RX_I_IP4_CSUM		cpu_to_le16(BIT(14))
+#define PCIE_DESC_RX_I_IP4_CSUM_OK	cpu_to_le16(BIT(13))
+#define PCIE_DESC_RX_I_TCP_CSUM		cpu_to_le16(BIT(12))
+#define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
+#define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
+#define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
+#define PCIE_DESC_RX_SPARE		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
+#define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
+#define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
+#define PCIE_DESC_RX_TCP_CSUM		cpu_to_le16(BIT(4))
+#define PCIE_DESC_RX_TCP_CSUM_OK	cpu_to_le16(BIT(3))
+#define PCIE_DESC_RX_UDP_CSUM		cpu_to_le16(BIT(2))
+#define PCIE_DESC_RX_UDP_CSUM_OK	cpu_to_le16(BIT(1))
+#define PCIE_DESC_RX_VLAN		cpu_to_le16(BIT(0))
+
+#define PCIE_DESC_RX_CSUM_ALL		(PCIE_DESC_RX_IP4_CSUM |	\
+					 PCIE_DESC_RX_TCP_CSUM |	\
+					 PCIE_DESC_RX_UDP_CSUM |	\
+					 PCIE_DESC_RX_I_IP4_CSUM |	\
+					 PCIE_DESC_RX_I_TCP_CSUM |	\
+					 PCIE_DESC_RX_I_UDP_CSUM)
+#define PCIE_DESC_RX_CSUM_OK_SHIFT	1
+#define __PCIE_DESC_RX_CSUM_ALL		le16_to_cpu(PCIE_DESC_RX_CSUM_ALL)
+#define __PCIE_DESC_RX_CSUM_ALL_OK	(__PCIE_DESC_RX_CSUM_ALL >>	\
+					 PCIE_DESC_RX_CSUM_OK_SHIFT)
+
+struct nfp_net_rx_desc {
+	union {
+		struct {
+			u8 dma_addr_hi;	/* High bits of the buf address */
+			__le16 reserved; /* Must be zero */
+			u8 meta_len_dd; /* Must be zero */
+
+			__le32 dma_addr_lo; /* Low bits of the buffer address */
+		} __packed fld;
+
+		struct {
+			__le16 data_len; /* Length of the frame + meta data */
+			u8 reserved;
+			u8 meta_len_dd;	/* Length of meta data prepended +
+					 * descriptor done flag.
+					 */
+
+			__le16 flags;	/* RX flags. See @PCIE_DESC_RX_* */
+			__le16 vlan;	/* VLAN if stripped */
+		} __packed rxd;
+
+		__le32 vals[2];
+	};
+};
+
+struct nfp_net_rx_hash {
+	__be32 hash_type;
+	__be32 hash;
+};
+
+/**
+ * struct nfp_net_rx_buf - software RX buffer descriptor
+ * @skb:	sk_buff associated with this buffer
+ * @dma_addr:	DMA mapping address of the buffer
+ */
+struct nfp_net_rx_buf {
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+};
+
+/**
+ * struct nfp_net_rx_ring - RX ring structure
+ * @r_vec:      Back pointer to ring vector structure
+ * @cnt:        Size of the queue in number of descriptors
+ * @wr_p:       FL/RX ring write pointer (free running)
+ * @rd_p:       FL/RX ring read pointer (free running)
+ * @idx:        Ring index from Linux's perspective
+ * @fl_qcidx:   Queue Controller Peripheral (QCP) queue index for the freelist
+ * @rx_qcidx:   Queue Controller Peripheral (QCP) queue index for the RX queue
+ * @qcp_fl:     Pointer to base of the QCP freelist queue
+ * @qcp_rx:     Pointer to base of the QCP RX queue
+ * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer
+ *              (used for free list batching)
+ * @rxbufs:     Array of transmitted FL/RX buffers
+ * @rxds:       Virtual address of FL/RX ring in host memory
+ * @dma:        DMA address of the FL/RX ring
+ * @size:       Size, in bytes, of the FL/RX ring (needed to free)
+ */
+struct nfp_net_rx_ring {
+	struct nfp_net_r_vector *r_vec;
+
+	u32 cnt;
+	u32 wr_p;
+	u32 rd_p;
+
+	u16 idx;
+	u16 wr_ptr_add;
+
+	int fl_qcidx;
+	int rx_qcidx;
+	u8 __iomem *qcp_fl;
+	u8 __iomem *qcp_rx;
+
+	struct nfp_net_rx_buf *rxbufs;
+	struct nfp_net_rx_desc *rxds;
+
+	dma_addr_t dma;
+	unsigned int size;
+} ____cacheline_aligned;
+
+/**
+ * struct nfp_net_r_vector - Per ring interrupt vector configuration
+ * @nfp_net:        Backpointer to nfp_net structure
+ * @napi:           NAPI structure for this ring vec
+ * @tx_ring:        Pointer to TX ring
+ * @rx_ring:        Pointer to RX ring
+ * @irq_idx:        Index into MSI-X table
+ * @rx_sync:	    Seqlock for atomic updates of RX stats
+ * @rx_pkts:        Number of received packets
+ * @rx_bytes:	    Number of received bytes
+ * @rx_drops:	    Number of packets dropped on RX due to lack of resources
+ * @hw_csum_rx_ok:  Counter of packets where the HW checksum was OK
+ * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK
+ * @hw_csum_rx_error:	 Counter of packets with bad checksums
+ * @tx_sync:	    Seqlock for atomic updates of TX stats
+ * @tx_pkts:	    Number of Transmitted packets
+ * @tx_bytes:	    Number of Transmitted bytes
+ * @hw_csum_tx:	    Counter of packets with TX checksum offload requested
+ * @hw_csum_tx_inner:	 Counter of inner TX checksum offload requests
+ * @tx_gather:	    Counter of packets with Gather DMA
+ * @tx_lso:	    Counter of LSO packets sent
+ * @tx_errors:	    How many TX errors were encountered
+ * @tx_busy:        How often was TX busy (no space)?
+ * @handler:        Interrupt handler for this ring vector
+ * @name:           Name of the interrupt vector
+ * @affinity_mask:  SMP affinity mask for this vector
+ *
+ * This structure ties RX and TX rings to interrupt vectors and a NAPI
+ * context. This currently only supports one RX and TX ring per
+ * interrupt vector but might be extended in the future to allow
+ * association of multiple rings per vector.
+ */
+struct nfp_net_r_vector {
+	struct nfp_net *nfp_net;
+	struct napi_struct napi;
+
+	struct nfp_net_tx_ring *tx_ring;
+	struct nfp_net_rx_ring *rx_ring;
+
+	int irq_idx;
+
+	struct u64_stats_sync rx_sync;
+	u64 rx_pkts;
+	u64 rx_bytes;
+	u64 rx_drops;
+	u64 hw_csum_rx_ok;
+	u64 hw_csum_rx_inner_ok;
+	u64 hw_csum_rx_error;
+
+	struct u64_stats_sync tx_sync;
+	u64 tx_pkts;
+	u64 tx_bytes;
+	u64 hw_csum_tx;
+	u64 hw_csum_tx_inner;
+	u64 tx_gather;
+	u64 tx_lso;
+	u64 tx_errors;
+	u64 tx_busy;
+
+	irq_handler_t handler;
+	char name[IFNAMSIZ + 8];
+	cpumask_t affinity_mask;
+} ____cacheline_aligned;
+
+/* Firmware version as it is written in the 32bit value in the BAR */
+struct nfp_net_fw_version {
+	u8 minor;
+	u8 major;
+	u8 class;
+	u8 resv;
+} __packed;
+
+static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
+				     u8 resv, u8 class, u8 major, u8 minor)
+{
+	return fw_ver->resv == resv &&
+	       fw_ver->class == class &&
+	       fw_ver->major == major &&
+	       fw_ver->minor == minor;
+}
+
+/**
+ * struct nfp_net - NFP network device structure
+ * @pdev:               Backpointer to PCI device
+ * @netdev:             Backpointer to net_device structure
+ * @nfp_fallback:       Is the driver used in fallback mode?
+ * @is_vf:              Is the driver attached to a VF?
+ * @is_nfp3200:         Is the driver for a NFP-3200 card?
+ * @fw_loaded:          Is the firmware loaded?
+ * @ctrl:               Local copy of the control register/word.
+ * @fl_bufsz:           Currently configured size of the freelist buffers
+ * @rx_offset:		Offset in the RX buffers where packet data starts
+ * @cpp:                Pointer to the CPP handle
+ * @nfp_dev_cpp:        Pointer to the NFP Device handle
+ * @ctrl_area:          Pointer to the CPP area for the control BAR
+ * @tx_area:            Pointer to the CPP area for the TX queues
+ * @rx_area:            Pointer to the CPP area for the FL/RX queues
+ * @fw_ver:             Firmware version
+ * @cap:                Capabilities advertised by the Firmware
+ * @max_mtu:            Maximum support MTU advertised by the Firmware
+ * @rss_cfg:            RSS configuration
+ * @rss_key:            RSS secret key
+ * @rss_itbl:           RSS indirection table
+ * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
+ * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
+ * @num_tx_rings:       Currently configured number of TX rings
+ * @num_rx_rings:       Currently configured number of RX rings
+ * @txd_cnt:            Size of the TX ring in number of descriptors
+ * @rxd_cnt:            Size of the RX ring in number of descriptors
+ * @tx_rings:           Array of pre-allocated TX ring structures
+ * @rx_rings:           Array of pre-allocated RX ring structures
+ * @num_irqs:	        Number of allocated interrupt vectors
+ * @num_r_vecs:         Number of used ring vectors
+ * @r_vecs:             Pre-allocated array of ring vectors
+ * @irq_entries:        Pre-allocated array of MSI-X entries
+ * @lsc_handler:        Handler for Link State Change interrupt
+ * @lsc_name:           Name for Link State Change interrupt
+ * @exn_handler:        Handler for Exception interrupt
+ * @exn_name:           Name for Exception interrupt
+ * @shared_handler:     Handler for shared interrupts
+ * @shared_name:        Name for shared interrupt
+ * @me_freq_mhz:        ME clock_freq (MHz)
+ * @reconfig_lock:	Protects HW reconfiguration request regs/machinery
+ * @link_up:            Is the link up?
+ * @link_status_lock:	Protects @link_up and ensures atomicity with BAR reading
+ * @rx_coalesce_usecs:      RX interrupt moderation usecs delay parameter
+ * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter
+ * @tx_coalesce_usecs:      TX interrupt moderation usecs delay parameter
+ * @tx_coalesce_max_frames: TX interrupt moderation frame count parameter
+ * @vxlan_ports:	VXLAN ports for RX inner csum offload communicated to HW
+ * @vxlan_usecnt:	IPv4/IPv6 VXLAN port use counts
+ * @qcp_cfg:            Pointer to QCP queue used for configuration notification
+ * @ctrl_bar:           Pointer to mapped control BAR
+ * @tx_bar:             Pointer to mapped TX queues
+ * @rx_bar:             Pointer to mapped FL/RX queues
+ * @debugfs_dir:	Device directory in debugfs
+ */
+struct nfp_net {
+	struct pci_dev *pdev;
+	struct net_device *netdev;
+
+	unsigned nfp_fallback:1;
+	unsigned is_vf:1;
+	unsigned is_nfp3200:1;
+	unsigned fw_loaded:1;
+
+	u32 ctrl;
+	u32 fl_bufsz;
+
+	u32 rx_offset;
+
+#ifdef CONFIG_PCI_IOV
+	unsigned int num_vfs;
+	struct vf_data_storage *vfinfo;
+	int vf_rate_link_speed;
+#endif
+
+	struct nfp_cpp *cpp;
+	struct platform_device *nfp_dev_cpp;
+	struct nfp_cpp_area *ctrl_area;
+	struct nfp_cpp_area *tx_area;
+	struct nfp_cpp_area *rx_area;
+
+	struct nfp_net_fw_version fw_ver;
+	u32 cap;
+	u32 max_mtu;
+
+	u32 rss_cfg;
+	u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
+	u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
+
+	int max_tx_rings;
+	int max_rx_rings;
+
+	int num_tx_rings;
+	int num_rx_rings;
+
+	int stride_tx;
+	int stride_rx;
+
+	int txd_cnt;
+	int rxd_cnt;
+
+	struct nfp_net_tx_ring tx_rings[NFP_NET_MAX_TX_RINGS];
+	struct nfp_net_rx_ring rx_rings[NFP_NET_MAX_RX_RINGS];
+
+	u8 num_irqs;
+	u8 num_r_vecs;
+	struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS];
+	struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS +
+				      NFP_NET_MAX_TX_RINGS];
+
+	irq_handler_t lsc_handler;
+	char lsc_name[IFNAMSIZ + 8];
+
+	irq_handler_t exn_handler;
+	char exn_name[IFNAMSIZ + 8];
+
+	irq_handler_t shared_handler;
+	char shared_name[IFNAMSIZ + 8];
+
+	u32 me_freq_mhz;
+
+	bool link_up;
+	spinlock_t link_status_lock;
+
+	spinlock_t reconfig_lock;
+
+	u32 rx_coalesce_usecs;
+	u32 rx_coalesce_max_frames;
+	u32 tx_coalesce_usecs;
+	u32 tx_coalesce_max_frames;
+
+	__be16 vxlan_ports[NFP_NET_N_VXLAN_PORTS];
+	u8 vxlan_usecnt[NFP_NET_N_VXLAN_PORTS];
+
+	u8 __iomem *qcp_cfg;
+
+	u8 __iomem *ctrl_bar;
+	u8 __iomem *q_bar;
+	u8 __iomem *tx_bar;
+	u8 __iomem *rx_bar;
+
+	struct dentry *debugfs_dir;
+};
+
+/* Functions to read/write from/to a BAR
+ * Performs any endian conversion necessary.
+ */
+static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
+{
+	writeb(val, nn->ctrl_bar + off);
+}
+
+/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
+
+static inline u32 nn_readl(struct nfp_net *nn, int off)
+{
+	return readl(nn->ctrl_bar + off);
+}
+
+static inline void nn_writel(struct nfp_net *nn, int off, u32 val)
+{
+	writel(val, nn->ctrl_bar + off);
+}
+
+static inline u64 nn_readq(struct nfp_net *nn, int off)
+{
+	return readq(nn->ctrl_bar + off);
+}
+
+static inline void nn_writeq(struct nfp_net *nn, int off, u64 val)
+{
+	writeq(val, nn->ctrl_bar + off);
+}
+
+/* Flush posted PCI writes by reading something without side effects */
+static inline void nn_pci_flush(struct nfp_net *nn)
+{
+	nn_readl(nn, NFP_NET_CFG_VERSION);
+}
+
+/* Queue Controller Peripheral access functions and definitions.
+ *
+ * Some of the BARs of the NFP are mapped to portions of the Queue
+ * Controller Peripheral (QCP) address space on the NFP.  A QCP queue
+ * has a read and a write pointer (as well as a size and flags,
+ * indicating overflow etc).  The QCP offers a number of different
+ * operation on queue pointers, but here we only offer function to
+ * either add to a pointer or to read the pointer value.
+ */
+#define NFP_QCP_QUEUE_ADDR_SZ			0x800
+#define NFP_QCP_QUEUE_OFF(_x)			((_x) * NFP_QCP_QUEUE_ADDR_SZ)
+#define NFP_QCP_QUEUE_ADD_RPTR			0x0000
+#define NFP_QCP_QUEUE_ADD_WPTR			0x0004
+#define NFP_QCP_QUEUE_STS_LO			0x0008
+#define NFP_QCP_QUEUE_STS_LO_READPTR_mask	0x3ffff
+#define NFP_QCP_QUEUE_STS_HI			0x000c
+#define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask	0x3ffff
+
+/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */
+#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff)))
+
+/* nfp_qcp_ptr - Read or Write Pointer of a queue */
+enum nfp_qcp_ptr {
+	NFP_QCP_READ_PTR = 0,
+	NFP_QCP_WRITE_PTR
+};
+
+/* There appear to be an *undocumented* upper limit on the value which
+ * one can add to a queue and that value is either 0x3f or 0x7f.  We
+ * go with 0x3f as a conservative measure.
+ */
+#define NFP_QCP_MAX_ADD				0x3f
+
+static inline void _nfp_qcp_ptr_add(u8 __iomem *q,
+				    enum nfp_qcp_ptr ptr, u32 val)
+{
+	u32 off;
+
+	if (ptr == NFP_QCP_READ_PTR)
+		off = NFP_QCP_QUEUE_ADD_RPTR;
+	else
+		off = NFP_QCP_QUEUE_ADD_WPTR;
+
+	while (val > NFP_QCP_MAX_ADD) {
+		writel(NFP_QCP_MAX_ADD, q + off);
+		val -= NFP_QCP_MAX_ADD;
+	}
+
+	writel(val, q + off);
+}
+
+/**
+ * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue
+ *
+ * @q:   Base address for queue structure
+ * @val: Value to add to the queue pointer
+ *
+ * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed.
+ */
+static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val)
+{
+	_nfp_qcp_ptr_add(q, NFP_QCP_READ_PTR, val);
+}
+
+/**
+ * nfp_qcp_wr_ptr_add() - Add the value to the write pointer of a queue
+ *
+ * @q:   Base address for queue structure
+ * @val: Value to add to the queue pointer
+ *
+ * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed.
+ */
+static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val)
+{
+	_nfp_qcp_ptr_add(q, NFP_QCP_WRITE_PTR, val);
+}
+
+static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr)
+{
+	u32 off;
+	u32 val;
+
+	if (ptr == NFP_QCP_READ_PTR)
+		off = NFP_QCP_QUEUE_STS_LO;
+	else
+		off = NFP_QCP_QUEUE_STS_HI;
+
+	val = readl(q + off);
+
+	if (ptr == NFP_QCP_READ_PTR)
+		return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask;
+	else
+		return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask;
+}
+
+/**
+ * nfp_qcp_rd_ptr_read() - Read the current read pointer value for a queue
+ * @q:  Base address for queue structure
+ *
+ * Return: Value read.
+ */
+static inline u32 nfp_qcp_rd_ptr_read(u8 __iomem *q)
+{
+	return _nfp_qcp_read(q, NFP_QCP_READ_PTR);
+}
+
+/**
+ * nfp_qcp_wr_ptr_read() - Read the current write pointer value for a queue
+ * @q:  Base address for queue structure
+ *
+ * Return: Value read.
+ */
+static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q)
+{
+	return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR);
+}
+
+/* Globals */
+extern const char nfp_net_driver_name[];
+extern const char nfp_net_driver_version[];
+
+/* Prototypes */
+void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
+			    void __iomem *ctrl_bar);
+
+struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
+				     int max_tx_rings, int max_rx_rings);
+void nfp_net_netdev_free(struct nfp_net *nn);
+int nfp_net_netdev_init(struct net_device *netdev);
+void nfp_net_netdev_clean(struct net_device *netdev);
+void nfp_net_set_ethtool_ops(struct net_device *netdev);
+void nfp_net_info(struct nfp_net *nn);
+int nfp_net_reconfig(struct nfp_net *nn, u32 update);
+void nfp_net_rss_write_itbl(struct nfp_net *nn);
+void nfp_net_rss_write_key(struct nfp_net *nn);
+void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
+int nfp_net_irqs_alloc(struct nfp_net *nn);
+void nfp_net_irqs_disable(struct nfp_net *nn);
+
+#ifdef CONFIG_NFP_NET_DEBUG
+void nfp_net_debugfs_create(void);
+void nfp_net_debugfs_destroy(void);
+void nfp_net_debugfs_adapter_add(struct nfp_net *nn);
+void nfp_net_debugfs_adapter_del(struct nfp_net *nn);
+#else
+static inline void nfp_net_debugfs_create(void)
+{
+}
+
+static inline void nfp_net_debugfs_destroy(void)
+{
+}
+
+static inline void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
+{
+}
+
+static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
+{
+}
+#endif /* CONFIG_NFP_NET_DEBUG */
+
+#endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
new file mode 100644
index 000000000000..038ac6b14a60
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -0,0 +1,2432 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_common.c
+ * Netronome network device driver: Common functions between PF and VF
+ * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
+ *          Jason McMullan <jason.mcmullan@netronome.com>
+ *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
+ *          Brad Petrus <brad.petrus@netronome.com>
+ *          Chris Telfer <chris.telfer@netronome.com>
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/msi.h>
+#include <linux/ethtool.h>
+#include <linux/log2.h>
+#include <linux/if_vlan.h>
+#include <linux/random.h>
+
+#include <linux/ktime.h>
+
+#include <net/vxlan.h>
+
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+/**
+ * nfp_net_get_fw_version() - Read and parse the FW version
+ * @fw_ver:	Output fw_version structure to read to
+ * @ctrl_bar:	Mapped address of the control BAR
+ */
+void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
+			    void __iomem *ctrl_bar)
+{
+	u32 reg;
+
+	reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
+	put_unaligned_le32(reg, fw_ver);
+}
+
+/**
+ * nfp_net_reconfig() - Reconfigure the firmware
+ * @nn:      NFP Net device to reconfigure
+ * @update:  The value for the update field in the BAR config
+ *
+ * Write the update word to the BAR and ping the reconfig queue.  The
+ * poll until the firmware has acknowledged the update by zeroing the
+ * update word.
+ *
+ * Return: Negative errno on error, 0 on success
+ */
+int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+{
+	int cnt, ret = 0;
+	u32 new;
+
+	spin_lock_bh(&nn->reconfig_lock);
+
+	nn_writel(nn, NFP_NET_CFG_UPDATE, update);
+	/* ensure update is written before pinging HW */
+	nn_pci_flush(nn);
+	nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
+
+	/* Poll update field, waiting for NFP to ack the config */
+	for (cnt = 0; ; cnt++) {
+		new = nn_readl(nn, NFP_NET_CFG_UPDATE);
+		if (new == 0)
+			break;
+		if (new & NFP_NET_CFG_UPDATE_ERR) {
+			nn_err(nn, "Reconfig error: 0x%08x\n", new);
+			ret = -EIO;
+			break;
+		} else if (cnt >= NFP_NET_POLL_TIMEOUT) {
+			nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n",
+			       update, cnt);
+			ret = -EIO;
+			break;
+		}
+		mdelay(1);
+	}
+
+	spin_unlock_bh(&nn->reconfig_lock);
+	return ret;
+}
+
+/* Interrupt configuration and handling
+ */
+
+/**
+ * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking
+ * @nn:       NFP Network structure
+ * @entry_nr: MSI-X table entry
+ *
+ * Clear the MSI-X table mask bit for the given entry bypassing Linux irq
+ * handling subsystem.  Use *only* to reenable automasked vectors.
+ */
+static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr)
+{
+	struct list_head *msi_head = &nn->pdev->dev.msi_list;
+	struct msi_desc *entry;
+	u32 off;
+
+	/* All MSI-Xs have the same mask_base */
+	entry = list_first_entry(msi_head, struct msi_desc, list);
+
+	off = (PCI_MSIX_ENTRY_SIZE * entry_nr) +
+		PCI_MSIX_ENTRY_VECTOR_CTRL;
+	writel(0, entry->mask_base + off);
+	readl(entry->mask_base);
+}
+
+/**
+ * nfp_net_irq_unmask() - Unmask automasked interrupt
+ * @nn:       NFP Network structure
+ * @entry_nr: MSI-X table entry
+ *
+ * If MSI-X auto-masking is enabled clear the mask bit, otherwise
+ * clear the ICR for the entry.
+ */
+static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
+{
+	if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
+		nfp_net_irq_unmask_msix(nn, entry_nr);
+		return;
+	}
+
+	nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
+	nn_pci_flush(nn);
+}
+
+/**
+ * nfp_net_msix_alloc() - Try to allocate MSI-X irqs
+ * @nn:       NFP Network structure
+ * @nr_vecs:  Number of MSI-X vectors to allocate
+ *
+ * For MSI-X we want at least NFP_NET_NON_Q_VECTORS + 1 vectors.
+ *
+ * Return: Number of MSI-X vectors obtained or 0 on error.
+ */
+static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs)
+{
+	struct pci_dev *pdev = nn->pdev;
+	int nvecs;
+	int i;
+
+	for (i = 0; i < nr_vecs; i++)
+		nn->irq_entries[i].entry = i;
+
+	nvecs = pci_enable_msix_range(pdev, nn->irq_entries,
+				      NFP_NET_NON_Q_VECTORS + 1, nr_vecs);
+	if (nvecs < 0) {
+		nn_warn(nn, "Failed to enable MSI-X. Wanted %d-%d (err=%d)\n",
+			NFP_NET_NON_Q_VECTORS + 1, nr_vecs, nvecs);
+		return 0;
+	}
+
+	return nvecs;
+}
+
+/**
+ * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want
+ * @nn:       NFP Network structure
+ *
+ * We want a vector per CPU (or ring), whatever is smaller plus
+ * NFP_NET_NON_Q_VECTORS for LSC etc.
+ *
+ * Return: Number of interrupts wanted
+ */
+static int nfp_net_irqs_wanted(struct nfp_net *nn)
+{
+	int ncpus;
+	int vecs;
+
+	ncpus = num_online_cpus();
+
+	vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings);
+	vecs = min_t(int, vecs, ncpus);
+
+	return vecs + NFP_NET_NON_Q_VECTORS;
+}
+
+/**
+ * nfp_net_irqs_alloc() - allocates MSI-X irqs
+ * @nn:       NFP Network structure
+ *
+ * Return: Number of irqs obtained or 0 on error.
+ */
+int nfp_net_irqs_alloc(struct nfp_net *nn)
+{
+	int wanted_irqs;
+
+	wanted_irqs = nfp_net_irqs_wanted(nn);
+
+	nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs);
+	if (nn->num_irqs == 0) {
+		nn_err(nn, "Failed to allocate MSI-X IRQs\n");
+		return 0;
+	}
+
+	nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS;
+
+	if (nn->num_irqs < wanted_irqs)
+		nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n",
+			wanted_irqs, nn->num_irqs);
+
+	return nn->num_irqs;
+}
+
+/**
+ * nfp_net_irqs_disable() - Disable interrupts
+ * @nn:       NFP Network structure
+ *
+ * Undoes what @nfp_net_irqs_alloc() does.
+ */
+void nfp_net_irqs_disable(struct nfp_net *nn)
+{
+	pci_disable_msix(nn->pdev);
+}
+
+/**
+ * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
+ * @irq:      Interrupt
+ * @data:     Opaque data structure
+ *
+ * Return: Indicate if the interrupt has been handled.
+ */
+static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
+{
+	struct nfp_net_r_vector *r_vec = data;
+
+	napi_schedule_irqoff(&r_vec->napi);
+
+	/* The FW auto-masks any interrupt, either via the MASK bit in
+	 * the MSI-X table or via the per entry ICR field.  So there
+	 * is no need to disable interrupts here.
+	 */
+	return IRQ_HANDLED;
+}
+
+/**
+ * nfp_net_read_link_status() - Reread link status from control BAR
+ * @nn:       NFP Network structure
+ */
+static void nfp_net_read_link_status(struct nfp_net *nn)
+{
+	unsigned long flags;
+	bool link_up;
+	u32 sts;
+
+	spin_lock_irqsave(&nn->link_status_lock, flags);
+
+	sts = nn_readl(nn, NFP_NET_CFG_STS);
+	link_up = !!(sts & NFP_NET_CFG_STS_LINK);
+
+	if (nn->link_up == link_up)
+		goto out;
+
+	nn->link_up = link_up;
+
+	if (nn->link_up) {
+		netif_carrier_on(nn->netdev);
+		netdev_info(nn->netdev, "NIC Link is Up\n");
+	} else {
+		netif_carrier_off(nn->netdev);
+		netdev_info(nn->netdev, "NIC Link is Down\n");
+	}
+out:
+	spin_unlock_irqrestore(&nn->link_status_lock, flags);
+}
+
+/**
+ * nfp_net_irq_lsc() - Interrupt service routine for link state changes
+ * @irq:      Interrupt
+ * @data:     Opaque data structure
+ *
+ * Return: Indicate if the interrupt has been handled.
+ */
+static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
+{
+	struct nfp_net *nn = data;
+
+	nfp_net_read_link_status(nn);
+
+	nfp_net_irq_unmask(nn, NFP_NET_IRQ_LSC_IDX);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * nfp_net_irq_exn() - Interrupt service routine for exceptions
+ * @irq:      Interrupt
+ * @data:     Opaque data structure
+ *
+ * Return: Indicate if the interrupt has been handled.
+ */
+static irqreturn_t nfp_net_irq_exn(int irq, void *data)
+{
+	struct nfp_net *nn = data;
+
+	nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
+	/* XXX TO BE IMPLEMENTED */
+	return IRQ_HANDLED;
+}
+
+/**
+ * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
+ * @tx_ring:  TX ring structure
+ */
+static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+
+	tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
+	tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
+}
+
+/**
+ * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
+ * @rx_ring:  RX ring structure
+ */
+static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring)
+{
+	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+
+	rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
+	rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1);
+
+	rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
+	rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx);
+}
+
+/**
+ * nfp_net_irqs_assign() - Assign IRQs and setup rvecs.
+ * @netdev:   netdev structure
+ */
+static void nfp_net_irqs_assign(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_r_vector *r_vec;
+	int r;
+
+	/* Assumes nn->num_tx_rings == nn->num_rx_rings */
+	if (nn->num_tx_rings > nn->num_r_vecs) {
+		nn_warn(nn, "More rings (%d) than vectors (%d).\n",
+			nn->num_tx_rings, nn->num_r_vecs);
+		nn->num_tx_rings = nn->num_r_vecs;
+		nn->num_rx_rings = nn->num_r_vecs;
+	}
+
+	nn->lsc_handler = nfp_net_irq_lsc;
+	nn->exn_handler = nfp_net_irq_exn;
+
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		r_vec = &nn->r_vecs[r];
+		r_vec->nfp_net = nn;
+		r_vec->handler = nfp_net_irq_rxtx;
+		r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r;
+
+		cpumask_set_cpu(r, &r_vec->affinity_mask);
+
+		r_vec->tx_ring = &nn->tx_rings[r];
+		nn->tx_rings[r].idx = r;
+		nn->tx_rings[r].r_vec = r_vec;
+		nfp_net_tx_ring_init(r_vec->tx_ring);
+
+		r_vec->rx_ring = &nn->rx_rings[r];
+		nn->rx_rings[r].idx = r;
+		nn->rx_rings[r].r_vec = r_vec;
+		nfp_net_rx_ring_init(r_vec->rx_ring);
+	}
+}
+
+/**
+ * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
+ * @nn:		NFP Network structure
+ * @ctrl_offset: Control BAR offset where IRQ configuration should be written
+ * @format:	printf-style format to construct the interrupt name
+ * @name:	Pointer to allocated space for interrupt name
+ * @name_sz:	Size of space for interrupt name
+ * @vector_idx:	Index of MSI-X vector used for this interrupt
+ * @handler:	IRQ handler to register for this interrupt
+ */
+static int
+nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
+			const char *format, char *name, size_t name_sz,
+			unsigned int vector_idx, irq_handler_t handler)
+{
+	struct msix_entry *entry;
+	int err;
+
+	entry = &nn->irq_entries[vector_idx];
+
+	snprintf(name, name_sz, format, netdev_name(nn->netdev));
+	err = request_irq(entry->vector, handler, 0, name, nn);
+	if (err) {
+		nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
+		       entry->vector, err);
+		return err;
+	}
+	nn_writeb(nn, ctrl_offset, vector_idx);
+
+	return 0;
+}
+
+/**
+ * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
+ * @nn:		NFP Network structure
+ * @ctrl_offset: Control BAR offset where IRQ configuration should be written
+ * @vector_idx:	Index of MSI-X vector used for this interrupt
+ */
+static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
+				 unsigned int vector_idx)
+{
+	nn_writeb(nn, ctrl_offset, 0xff);
+	free_irq(nn->irq_entries[vector_idx].vector, nn);
+}
+
+/* Transmit
+ *
+ * One queue controller peripheral queue is used for transmit.  The
+ * driver en-queues packets for transmit by advancing the write
+ * pointer.  The device indicates that packets have transmitted by
+ * advancing the read pointer.  The driver maintains a local copy of
+ * the read and write pointer in @struct nfp_net_tx_ring.  The driver
+ * keeps @wr_p in sync with the queue controller write pointer and can
+ * determine how many packets have been transmitted by comparing its
+ * copy of the read pointer @rd_p with the read pointer maintained by
+ * the queue controller peripheral.
+ */
+
+/**
+ * nfp_net_tx_full() - Check if the TX ring is full
+ * @tx_ring: TX ring to check
+ * @dcnt:    Number of descriptors that need to be enqueued (must be >= 1)
+ *
+ * This function checks, based on the *host copy* of read/write
+ * pointer if a given TX ring is full.  The real TX queue may have
+ * some newly made available slots.
+ *
+ * Return: True if the ring is full.
+ */
+static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
+{
+	return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
+}
+
+/* Wrappers for deciding when to stop and restart TX queues */
+static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
+{
+	return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
+}
+
+static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
+{
+	return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
+}
+
+/**
+ * nfp_net_tx_ring_stop() - stop tx ring
+ * @nd_q:    netdev queue
+ * @tx_ring: driver tx queue structure
+ *
+ * Safely stop TX ring.  Remember that while we are running .start_xmit()
+ * someone else may be cleaning the TX ring completions so we need to be
+ * extra careful here.
+ */
+static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
+				 struct nfp_net_tx_ring *tx_ring)
+{
+	netif_tx_stop_queue(nd_q);
+
+	/* We can race with the TX completion out of NAPI so recheck */
+	smp_mb();
+	if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
+		netif_tx_start_queue(nd_q);
+}
+
+/**
+ * nfp_net_tx_tso() - Set up Tx descriptor for LSO
+ * @nn:  NFP Net device
+ * @r_vec: per-ring structure
+ * @txbuf: Pointer to driver soft TX descriptor
+ * @txd: Pointer to HW TX descriptor
+ * @skb: Pointer to SKB
+ *
+ * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
+ * Return error on packet header greater than maximum supported LSO header size.
+ */
+static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+			   struct nfp_net_tx_buf *txbuf,
+			   struct nfp_net_tx_desc *txd, struct sk_buff *skb)
+{
+	u32 hdrlen;
+	u16 mss;
+
+	if (!skb_is_gso(skb))
+		return;
+
+	if (!skb->encapsulation)
+		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	else
+		hdrlen = skb_inner_transport_header(skb) - skb->data +
+			inner_tcp_hdrlen(skb);
+
+	txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
+	txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
+
+	mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
+	txd->l4_offset = hdrlen;
+	txd->mss = cpu_to_le16(mss);
+	txd->flags |= PCIE_DESC_TX_LSO;
+
+	u64_stats_update_begin(&r_vec->tx_sync);
+	r_vec->tx_lso++;
+	u64_stats_update_end(&r_vec->tx_sync);
+}
+
+/**
+ * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
+ * @nn:  NFP Net device
+ * @r_vec: per-ring structure
+ * @txbuf: Pointer to driver soft TX descriptor
+ * @txd: Pointer to TX descriptor
+ * @skb: Pointer to SKB
+ *
+ * This function sets the TX checksum flags in the TX descriptor based
+ * on the configuration and the protocol of the packet to be transmitted.
+ */
+static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+			    struct nfp_net_tx_buf *txbuf,
+			    struct nfp_net_tx_desc *txd, struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	struct iphdr *iph;
+	u8 l4_hdr;
+
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
+		return;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return;
+
+	txd->flags |= PCIE_DESC_TX_CSUM;
+	if (skb->encapsulation)
+		txd->flags |= PCIE_DESC_TX_ENCAP;
+
+	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+
+	if (iph->version == 4) {
+		txd->flags |= PCIE_DESC_TX_IP4_CSUM;
+		l4_hdr = iph->protocol;
+	} else if (ipv6h->version == 6) {
+		l4_hdr = ipv6h->nexthdr;
+	} else {
+		nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n",
+				  iph->version);
+		return;
+	}
+
+	switch (l4_hdr) {
+	case IPPROTO_TCP:
+		txd->flags |= PCIE_DESC_TX_TCP_CSUM;
+		break;
+	case IPPROTO_UDP:
+		txd->flags |= PCIE_DESC_TX_UDP_CSUM;
+		break;
+	default:
+		nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n",
+				  l4_hdr);
+		return;
+	}
+
+	u64_stats_update_begin(&r_vec->tx_sync);
+	if (skb->encapsulation)
+		r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
+	else
+		r_vec->hw_csum_tx += txbuf->pkt_cnt;
+	u64_stats_update_end(&r_vec->tx_sync);
+}
+
+/**
+ * nfp_net_tx() - Main transmit entry point
+ * @skb:    SKB to transmit
+ * @netdev: netdev structure
+ *
+ * Return: NETDEV_TX_OK on success.
+ */
+static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	const struct skb_frag_struct *frag;
+	struct nfp_net_r_vector *r_vec;
+	struct nfp_net_tx_desc *txd, txdg;
+	struct nfp_net_tx_buf *txbuf;
+	struct nfp_net_tx_ring *tx_ring;
+	struct netdev_queue *nd_q;
+	dma_addr_t dma_addr;
+	unsigned int fsize;
+	int f, nr_frags;
+	int wr_idx;
+	u16 qidx;
+
+	qidx = skb_get_queue_mapping(skb);
+	tx_ring = &nn->tx_rings[qidx];
+	r_vec = tx_ring->r_vec;
+	nd_q = netdev_get_tx_queue(nn->netdev, qidx);
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
+		nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n",
+				  qidx, tx_ring->wr_p, tx_ring->rd_p);
+		netif_tx_stop_queue(nd_q);
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->tx_busy++;
+		u64_stats_update_end(&r_vec->tx_sync);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* Start with the head skbuf */
+	dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb),
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+		goto err_free;
+
+	wr_idx = tx_ring->wr_p % tx_ring->cnt;
+
+	/* Stash the soft descriptor of the head then initialize it */
+	txbuf = &tx_ring->txbufs[wr_idx];
+	txbuf->skb = skb;
+	txbuf->dma_addr = dma_addr;
+	txbuf->fidx = -1;
+	txbuf->pkt_cnt = 1;
+	txbuf->real_len = skb->len;
+
+	/* Build TX descriptor */
+	txd = &tx_ring->txds[wr_idx];
+	txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0;
+	txd->dma_len = cpu_to_le16(skb_headlen(skb));
+	nfp_desc_set_dma_addr(txd, dma_addr);
+	txd->data_len = cpu_to_le16(skb->len);
+
+	txd->flags = 0;
+	txd->mss = 0;
+	txd->l4_offset = 0;
+
+	nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb);
+
+	nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb);
+
+	if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
+		txd->flags |= PCIE_DESC_TX_VLAN;
+		txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
+	}
+
+	/* Gather DMA */
+	if (nr_frags > 0) {
+		/* all descs must match except for in addr, length and eop */
+		txdg = *txd;
+
+		for (f = 0; f < nr_frags; f++) {
+			frag = &skb_shinfo(skb)->frags[f];
+			fsize = skb_frag_size(frag);
+
+			dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0,
+						    fsize, DMA_TO_DEVICE);
+			if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+				goto err_unmap;
+
+			wr_idx = (wr_idx + 1) % tx_ring->cnt;
+			tx_ring->txbufs[wr_idx].skb = skb;
+			tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
+			tx_ring->txbufs[wr_idx].fidx = f;
+
+			txd = &tx_ring->txds[wr_idx];
+			*txd = txdg;
+			txd->dma_len = cpu_to_le16(fsize);
+			nfp_desc_set_dma_addr(txd, dma_addr);
+			txd->offset_eop =
+				(f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0;
+		}
+
+		u64_stats_update_begin(&r_vec->tx_sync);
+		r_vec->tx_gather++;
+		u64_stats_update_end(&r_vec->tx_sync);
+	}
+
+	netdev_tx_sent_queue(nd_q, txbuf->real_len);
+
+	tx_ring->wr_p += nr_frags + 1;
+	if (nfp_net_tx_ring_should_stop(tx_ring))
+		nfp_net_tx_ring_stop(nd_q, tx_ring);
+
+	tx_ring->wr_ptr_add += nr_frags + 1;
+	if (!skb->xmit_more || netif_xmit_stopped(nd_q)) {
+		/* force memory write before we let HW know */
+		wmb();
+		nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
+		tx_ring->wr_ptr_add = 0;
+	}
+
+	skb_tx_timestamp(skb);
+
+	return NETDEV_TX_OK;
+
+err_unmap:
+	--f;
+	while (f >= 0) {
+		frag = &skb_shinfo(skb)->frags[f];
+		dma_unmap_page(&nn->pdev->dev,
+			       tx_ring->txbufs[wr_idx].dma_addr,
+			       skb_frag_size(frag), DMA_TO_DEVICE);
+		tx_ring->txbufs[wr_idx].skb = NULL;
+		tx_ring->txbufs[wr_idx].dma_addr = 0;
+		tx_ring->txbufs[wr_idx].fidx = -2;
+		wr_idx = wr_idx - 1;
+		if (wr_idx < 0)
+			wr_idx += tx_ring->cnt;
+	}
+	dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr,
+			 skb_headlen(skb), DMA_TO_DEVICE);
+	tx_ring->txbufs[wr_idx].skb = NULL;
+	tx_ring->txbufs[wr_idx].dma_addr = 0;
+	tx_ring->txbufs[wr_idx].fidx = -2;
+err_free:
+	nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n");
+	u64_stats_update_begin(&r_vec->tx_sync);
+	r_vec->tx_errors++;
+	u64_stats_update_end(&r_vec->tx_sync);
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
+ * nfp_net_tx_complete() - Handled completed TX packets
+ * @tx_ring:   TX ring structure
+ *
+ * Return: Number of completed TX descriptors
+ */
+static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	const struct skb_frag_struct *frag;
+	struct netdev_queue *nd_q;
+	u32 done_pkts = 0, done_bytes = 0;
+	struct sk_buff *skb;
+	int todo, nr_frags;
+	u32 qcp_rd_p;
+	int fidx;
+	int idx;
+
+	/* Work out how many descriptors have been transmitted */
+	qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+
+	if (qcp_rd_p == tx_ring->qcp_rd_p)
+		return;
+
+	if (qcp_rd_p > tx_ring->qcp_rd_p)
+		todo = qcp_rd_p - tx_ring->qcp_rd_p;
+	else
+		todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p;
+
+	while (todo--) {
+		idx = tx_ring->rd_p % tx_ring->cnt;
+		tx_ring->rd_p++;
+
+		skb = tx_ring->txbufs[idx].skb;
+		if (!skb)
+			continue;
+
+		nr_frags = skb_shinfo(skb)->nr_frags;
+		fidx = tx_ring->txbufs[idx].fidx;
+
+		if (fidx == -1) {
+			/* unmap head */
+			dma_unmap_single(&nn->pdev->dev,
+					 tx_ring->txbufs[idx].dma_addr,
+					 skb_headlen(skb), DMA_TO_DEVICE);
+
+			done_pkts += tx_ring->txbufs[idx].pkt_cnt;
+			done_bytes += tx_ring->txbufs[idx].real_len;
+		} else {
+			/* unmap fragment */
+			frag = &skb_shinfo(skb)->frags[fidx];
+			dma_unmap_page(&nn->pdev->dev,
+				       tx_ring->txbufs[idx].dma_addr,
+				       skb_frag_size(frag), DMA_TO_DEVICE);
+		}
+
+		/* check for last gather fragment */
+		if (fidx == nr_frags - 1)
+			dev_kfree_skb_any(skb);
+
+		tx_ring->txbufs[idx].dma_addr = 0;
+		tx_ring->txbufs[idx].skb = NULL;
+		tx_ring->txbufs[idx].fidx = -2;
+	}
+
+	tx_ring->qcp_rd_p = qcp_rd_p;
+
+	u64_stats_update_begin(&r_vec->tx_sync);
+	r_vec->tx_bytes += done_bytes;
+	r_vec->tx_pkts += done_pkts;
+	u64_stats_update_end(&r_vec->tx_sync);
+
+	nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+	netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
+	if (nfp_net_tx_ring_should_wake(tx_ring)) {
+		/* Make sure TX thread will see updated tx_ring->rd_p */
+		smp_mb();
+
+		if (unlikely(netif_tx_queue_stopped(nd_q)))
+			netif_tx_wake_queue(nd_q);
+	}
+
+	WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
+		  "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
+		  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
+}
+
+/**
+ * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring
+ * @tx_ring:     TX ring structure
+ *
+ * Assumes that the device is stopped
+ */
+static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+	const struct skb_frag_struct *frag;
+	struct netdev_queue *nd_q;
+	struct sk_buff *skb;
+	int nr_frags;
+	int fidx;
+	int idx;
+
+	while (tx_ring->rd_p != tx_ring->wr_p) {
+		idx = tx_ring->rd_p % tx_ring->cnt;
+
+		skb = tx_ring->txbufs[idx].skb;
+		if (skb) {
+			nr_frags = skb_shinfo(skb)->nr_frags;
+			fidx = tx_ring->txbufs[idx].fidx;
+
+			if (fidx == -1) {
+				/* unmap head */
+				dma_unmap_single(&pdev->dev,
+						 tx_ring->txbufs[idx].dma_addr,
+						 skb_headlen(skb),
+						 DMA_TO_DEVICE);
+			} else {
+				/* unmap fragment */
+				frag = &skb_shinfo(skb)->frags[fidx];
+				dma_unmap_page(&pdev->dev,
+					       tx_ring->txbufs[idx].dma_addr,
+					       skb_frag_size(frag),
+					       DMA_TO_DEVICE);
+			}
+
+			/* check for last gather fragment */
+			if (fidx == nr_frags - 1)
+				dev_kfree_skb_any(skb);
+
+			tx_ring->txbufs[idx].dma_addr = 0;
+			tx_ring->txbufs[idx].skb = NULL;
+			tx_ring->txbufs[idx].fidx = -2;
+		}
+
+		memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx]));
+
+		tx_ring->qcp_rd_p++;
+		tx_ring->rd_p++;
+	}
+
+	nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+	netdev_tx_reset_queue(nd_q);
+}
+
+static void nfp_net_tx_timeout(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < nn->num_tx_rings; i++) {
+		if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
+			continue;
+		nn_warn(nn, "TX timeout on ring: %d\n", i);
+	}
+	nn_warn(nn, "TX watchdog timeout\n");
+}
+
+/* Receive processing
+ */
+
+/**
+ * nfp_net_rx_space() - return the number of free slots on the RX ring
+ * @rx_ring:   RX ring structure
+ *
+ * Make sure we leave at least one slot free.
+ *
+ * Return: True if there is space on the RX ring
+ */
+static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
+{
+	return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p);
+}
+
+/**
+ * nfp_net_rx_alloc_one() - Allocate and map skb for RX
+ * @rx_ring:	RX ring structure of the skb
+ * @dma_addr:	Pointer to storage for DMA address (output param)
+ *
+ * This function will allcate a new skb, map it for DMA.
+ *
+ * Return: allocated skb or NULL on failure.
+ */
+static struct sk_buff *
+nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr)
+{
+	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
+	struct sk_buff *skb;
+
+	skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz);
+	if (!skb) {
+		nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
+		return NULL;
+	}
+
+	*dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
+				  nn->fl_bufsz, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
+		dev_kfree_skb_any(skb);
+		nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+		return NULL;
+	}
+
+	return skb;
+}
+
+/**
+ * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
+ * @rx_ring:	RX ring structure
+ * @skb:	Skb to put on rings
+ * @dma_addr:	DMA address of skb mapping
+ */
+static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
+				struct sk_buff *skb, dma_addr_t dma_addr)
+{
+	unsigned int wr_idx;
+
+	wr_idx = rx_ring->wr_p % rx_ring->cnt;
+
+	/* Stash SKB and DMA address away */
+	rx_ring->rxbufs[wr_idx].skb = skb;
+	rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
+
+	/* Fill freelist descriptor */
+	rx_ring->rxds[wr_idx].fld.reserved = 0;
+	rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
+	nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr);
+
+	rx_ring->wr_p++;
+	rx_ring->wr_ptr_add++;
+	if (rx_ring->wr_ptr_add >= NFP_NET_FL_BATCH) {
+		/* Update write pointer of the freelist queue. Make
+		 * sure all writes are flushed before telling the hardware.
+		 */
+		wmb();
+		nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, rx_ring->wr_ptr_add);
+		rx_ring->wr_ptr_add = 0;
+	}
+}
+
+/**
+ * nfp_net_rx_flush() - Free any buffers currently on the RX ring
+ * @rx_ring:  RX ring to remove buffers from
+ *
+ * Assumes that the device is stopped
+ */
+static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring)
+{
+	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+	int idx;
+
+	while (rx_ring->rd_p != rx_ring->wr_p) {
+		idx = rx_ring->rd_p % rx_ring->cnt;
+
+		if (rx_ring->rxbufs[idx].skb) {
+			dma_unmap_single(&pdev->dev,
+					 rx_ring->rxbufs[idx].dma_addr,
+					 nn->fl_bufsz, DMA_FROM_DEVICE);
+			dev_kfree_skb_any(rx_ring->rxbufs[idx].skb);
+			rx_ring->rxbufs[idx].dma_addr = 0;
+			rx_ring->rxbufs[idx].skb = NULL;
+		}
+
+		memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx]));
+
+		rx_ring->rd_p++;
+	}
+}
+
+/**
+ * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers
+ * @rx_ring: RX ring to fill
+ *
+ * Try to fill as many buffers as possible into freelist.  Return
+ * number of buffers added.
+ *
+ * Return: Number of freelist buffers added.
+ */
+static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring)
+{
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+
+	while (nfp_net_rx_space(rx_ring)) {
+		skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr);
+		if (!skb) {
+			nfp_net_rx_flush(rx_ring);
+			return -ENOMEM;
+		}
+		nfp_net_rx_give_one(rx_ring, skb, dma_addr);
+	}
+
+	return 0;
+}
+
+/**
+ * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
+ * @flags: RX descriptor flags field in CPU byte order
+ */
+static int nfp_net_rx_csum_has_errors(u16 flags)
+{
+	u16 csum_all_checked, csum_all_ok;
+
+	csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
+	csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
+
+	return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
+}
+
+/**
+ * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
+ * @nn:  NFP Net device
+ * @r_vec: per-ring structure
+ * @rxd: Pointer to RX descriptor
+ * @skb: Pointer to SKB
+ */
+static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+			    struct nfp_net_rx_desc *rxd, struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	if (!(nn->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
+		u64_stats_update_begin(&r_vec->rx_sync);
+		r_vec->hw_csum_rx_error++;
+		u64_stats_update_end(&r_vec->rx_sync);
+		return;
+	}
+
+	/* Assume that the firmware will never report inner CSUM_OK unless outer
+	 * L4 headers were successfully parsed. FW will always report zero UDP
+	 * checksum as CSUM_OK.
+	 */
+	if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
+	    rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
+		__skb_incr_checksum_unnecessary(skb);
+		u64_stats_update_begin(&r_vec->rx_sync);
+		r_vec->hw_csum_rx_ok++;
+		u64_stats_update_end(&r_vec->rx_sync);
+	}
+
+	if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
+	    rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
+		__skb_incr_checksum_unnecessary(skb);
+		u64_stats_update_begin(&r_vec->rx_sync);
+		r_vec->hw_csum_rx_inner_ok++;
+		u64_stats_update_end(&r_vec->rx_sync);
+	}
+}
+
+/**
+ * nfp_net_set_hash() - Set SKB hash data
+ * @netdev: adapter's net_device structure
+ * @skb:   SKB to set the hash data on
+ * @rxd:   RX descriptor
+ *
+ * The RSS hash and hash-type are pre-pended to the packet data.
+ * Extract and decode it and set the skb fields.
+ */
+static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
+			     struct nfp_net_rx_desc *rxd)
+{
+	struct nfp_net_rx_hash *rx_hash;
+
+	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
+	    !(netdev->features & NETIF_F_RXHASH))
+		return;
+
+	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
+
+	switch (be32_to_cpu(rx_hash->hash_type)) {
+	case NFP_NET_RSS_IPV4:
+	case NFP_NET_RSS_IPV6:
+	case NFP_NET_RSS_IPV6_EX:
+		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
+		break;
+	default:
+		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
+		break;
+	}
+}
+
+/**
+ * nfp_net_rx() - receive up to @budget packets on @rx_ring
+ * @rx_ring:   RX ring to receive from
+ * @budget:    NAPI budget
+ *
+ * Note, this function is separated out from the napi poll function to
+ * more cleanly separate packet receive code from other bookkeeping
+ * functions performed in the napi poll function.
+ *
+ * There are differences between the NFP-3200 firmware and the
+ * NFP-6000 firmware.  The NFP-3200 firmware uses a dedicated RX queue
+ * to indicate that new packets have arrived.  The NFP-6000 does not
+ * have this queue and uses the DD bit in the RX descriptor. This
+ * method cannot be used on the NFP-3200 as it causes a race
+ * condition: The RX ring write pointer on the NFP-3200 is updated
+ * after packets (and descriptors) have been DMAed.  If the DD bit is
+ * used and subsequently the read pointer is updated this may lead to
+ * the RX queue to underflow (if the firmware has not yet update the
+ * write pointer).  Therefore we use slightly ugly conditional code
+ * below to handle the differences.  We may, in the future update the
+ * NFP-3200 firmware to behave the same as the firmware on the
+ * NFP-6000.
+ *
+ * Return: Number of packets received.
+ */
+static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
+{
+	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	unsigned int data_len, meta_len;
+	int avail = 0, pkts_polled = 0;
+	struct sk_buff *skb, *new_skb;
+	struct nfp_net_rx_desc *rxd;
+	dma_addr_t new_dma_addr;
+	u32 qcp_wr_p;
+	int idx;
+
+	if (nn->is_nfp3200) {
+		/* Work out how many packets arrived */
+		qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx);
+		idx = rx_ring->rd_p % rx_ring->cnt;
+
+		if (qcp_wr_p == idx)
+			/* No new packets */
+			return 0;
+
+		if (qcp_wr_p > idx)
+			avail = qcp_wr_p - idx;
+		else
+			avail = qcp_wr_p + rx_ring->cnt - idx;
+	} else {
+		avail = budget + 1;
+	}
+
+	while (avail > 0 && pkts_polled < budget) {
+		idx = rx_ring->rd_p % rx_ring->cnt;
+
+		rxd = &rx_ring->rxds[idx];
+		if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) {
+			if (nn->is_nfp3200)
+				nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n",
+				       rx_ring->idx, idx,
+				       rxd->vals[0], rxd->vals[1]);
+			break;
+		}
+		/* Memory barrier to ensure that we won't do other reads
+		 * before the DD bit.
+		 */
+		dma_rmb();
+
+		rx_ring->rd_p++;
+		pkts_polled++;
+		avail--;
+
+		skb = rx_ring->rxbufs[idx].skb;
+
+		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr);
+		if (!new_skb) {
+			nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb,
+					    rx_ring->rxbufs[idx].dma_addr);
+			u64_stats_update_begin(&r_vec->rx_sync);
+			r_vec->rx_drops++;
+			u64_stats_update_end(&r_vec->rx_sync);
+			continue;
+		}
+
+		dma_unmap_single(&nn->pdev->dev,
+				 rx_ring->rxbufs[idx].dma_addr,
+				 nn->fl_bufsz, DMA_FROM_DEVICE);
+
+		nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr);
+
+		meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
+		data_len = le16_to_cpu(rxd->rxd.data_len);
+
+		if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) {
+			/* The packet data starts after the metadata */
+			skb_reserve(skb, meta_len);
+		} else {
+			/* The packet data starts at a fixed offset */
+			skb_reserve(skb, nn->rx_offset);
+		}
+
+		/* Adjust the SKB for the dynamic meta data pre-pended */
+		skb_put(skb, data_len - meta_len);
+
+		nfp_net_set_hash(nn->netdev, skb, rxd);
+
+		/* Pad small frames to minimum */
+		if (skb_put_padto(skb, 60))
+			break;
+
+		/* Stats update */
+		u64_stats_update_begin(&r_vec->rx_sync);
+		r_vec->rx_pkts++;
+		r_vec->rx_bytes += skb->len;
+		u64_stats_update_end(&r_vec->rx_sync);
+
+		skb_record_rx_queue(skb, rx_ring->idx);
+		skb->protocol = eth_type_trans(skb, nn->netdev);
+
+		nfp_net_rx_csum(nn, r_vec, rxd, skb);
+
+		if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       le16_to_cpu(rxd->rxd.vlan));
+
+		napi_gro_receive(&rx_ring->r_vec->napi, skb);
+	}
+
+	if (nn->is_nfp3200)
+		nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled);
+
+	return pkts_polled;
+}
+
+/**
+ * nfp_net_poll() - napi poll function
+ * @napi:    NAPI structure
+ * @budget:  NAPI budget
+ *
+ * Return: number of packets polled.
+ */
+static int nfp_net_poll(struct napi_struct *napi, int budget)
+{
+	struct nfp_net_r_vector *r_vec =
+		container_of(napi, struct nfp_net_r_vector, napi);
+	struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
+	struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct netdev_queue *txq;
+	unsigned int pkts_polled;
+
+	tx_ring = &nn->tx_rings[rx_ring->idx];
+	txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+	nfp_net_tx_complete(tx_ring);
+
+	pkts_polled = nfp_net_rx(rx_ring, budget);
+
+	if (pkts_polled < budget) {
+		napi_complete_done(napi, pkts_polled);
+		nfp_net_irq_unmask(nn, r_vec->irq_idx);
+	}
+
+	return pkts_polled;
+}
+
+/* Setup and Configuration
+ */
+
+/**
+ * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
+ * @tx_ring:   TX ring to free
+ */
+static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+
+	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0);
+
+	kfree(tx_ring->txbufs);
+
+	if (tx_ring->txds)
+		dma_free_coherent(&pdev->dev, tx_ring->size,
+				  tx_ring->txds, tx_ring->dma);
+
+	tx_ring->cnt = 0;
+	tx_ring->wr_p = 0;
+	tx_ring->rd_p = 0;
+	tx_ring->qcp_rd_p = 0;
+
+	tx_ring->txbufs = NULL;
+	tx_ring->txds = NULL;
+	tx_ring->dma = 0;
+	tx_ring->size = 0;
+}
+
+/**
+ * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
+ * @tx_ring:   TX Ring structure to allocate
+ *
+ * Return: 0 on success, negative errno otherwise.
+ */
+static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+	int sz;
+
+	tx_ring->cnt = nn->txd_cnt;
+
+	tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
+	tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
+					    &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->txds)
+		goto err_alloc;
+
+	sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt;
+	tx_ring->txbufs = kzalloc(sz, GFP_KERNEL);
+	if (!tx_ring->txbufs)
+		goto err_alloc;
+
+	/* Write the DMA address, size and MSI-X info to the device */
+	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx);
+
+	netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx);
+
+	nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n",
+	       tx_ring->idx, tx_ring->qcidx,
+	       tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds);
+
+	return 0;
+
+err_alloc:
+	nfp_net_tx_ring_free(tx_ring);
+	return -ENOMEM;
+}
+
+/**
+ * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
+ * @rx_ring:  RX ring to free
+ */
+static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
+{
+	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+
+	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0);
+
+	kfree(rx_ring->rxbufs);
+
+	if (rx_ring->rxds)
+		dma_free_coherent(&pdev->dev, rx_ring->size,
+				  rx_ring->rxds, rx_ring->dma);
+
+	rx_ring->cnt = 0;
+	rx_ring->wr_p = 0;
+	rx_ring->rd_p = 0;
+
+	rx_ring->rxbufs = NULL;
+	rx_ring->rxds = NULL;
+	rx_ring->dma = 0;
+	rx_ring->size = 0;
+}
+
+/**
+ * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
+ * @rx_ring:  RX ring to allocate
+ *
+ * Return: 0 on success, negative errno otherwise.
+ */
+static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
+{
+	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	struct pci_dev *pdev = nn->pdev;
+	int sz;
+
+	rx_ring->cnt = nn->rxd_cnt;
+
+	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
+	rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
+					    &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->rxds)
+		goto err_alloc;
+
+	sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt;
+	rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL);
+	if (!rx_ring->rxbufs)
+		goto err_alloc;
+
+	/* Write the DMA address, size and MSI-X info to the device */
+	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx);
+
+	nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n",
+	       rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx,
+	       rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds);
+
+	return 0;
+
+err_alloc:
+	nfp_net_rx_ring_free(rx_ring);
+	return -ENOMEM;
+}
+
+static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free)
+{
+	struct nfp_net_r_vector *r_vec;
+	struct msix_entry *entry;
+
+	while (n_free--) {
+		r_vec = &nn->r_vecs[n_free];
+		entry = &nn->irq_entries[r_vec->irq_idx];
+
+		nfp_net_rx_ring_free(r_vec->rx_ring);
+		nfp_net_tx_ring_free(r_vec->tx_ring);
+
+		irq_set_affinity_hint(entry->vector, NULL);
+		free_irq(entry->vector, r_vec);
+
+		netif_napi_del(&r_vec->napi);
+	}
+}
+
+/**
+ * nfp_net_free_rings() - Free all ring resources
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_free_rings(struct nfp_net *nn)
+{
+	__nfp_net_free_rings(nn, nn->num_r_vecs);
+}
+
+/**
+ * nfp_net_alloc_rings() - Allocate resources for RX and TX rings
+ * @nn:      NFP Net device to reconfigure
+ *
+ * Return: 0 on success or negative errno on error.
+ */
+static int nfp_net_alloc_rings(struct nfp_net *nn)
+{
+	struct nfp_net_r_vector *r_vec;
+	struct msix_entry *entry;
+	int err;
+	int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		r_vec = &nn->r_vecs[r];
+		entry = &nn->irq_entries[r_vec->irq_idx];
+
+		/* Setup NAPI */
+		netif_napi_add(nn->netdev, &r_vec->napi,
+			       nfp_net_poll, NAPI_POLL_WEIGHT);
+
+		snprintf(r_vec->name, sizeof(r_vec->name),
+			 "%s-rxtx-%d", nn->netdev->name, r);
+		err = request_irq(entry->vector, r_vec->handler, 0,
+				  r_vec->name, r_vec);
+		if (err) {
+			nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector);
+			goto err_napi_del;
+		}
+
+		irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask);
+
+		nn_dbg(nn, "RV%02d: irq=%03d/%03d\n",
+		       r, entry->vector, entry->entry);
+
+		/* Allocate TX ring resources */
+		err = nfp_net_tx_ring_alloc(r_vec->tx_ring);
+		if (err)
+			goto err_free_irq;
+
+		/* Allocate RX ring resources */
+		err = nfp_net_rx_ring_alloc(r_vec->rx_ring);
+		if (err)
+			goto err_free_tx;
+	}
+
+	return 0;
+
+err_free_tx:
+	nfp_net_tx_ring_free(r_vec->tx_ring);
+err_free_irq:
+	irq_set_affinity_hint(entry->vector, NULL);
+	free_irq(entry->vector, r_vec);
+err_napi_del:
+	netif_napi_del(&r_vec->napi);
+	__nfp_net_free_rings(nn, r);
+	return err;
+}
+
+/**
+ * nfp_net_rss_write_itbl() - Write RSS indirection table to device
+ * @nn:      NFP Net device to reconfigure
+ */
+void nfp_net_rss_write_itbl(struct nfp_net *nn)
+{
+	int i;
+
+	for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
+		nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
+			  get_unaligned_le32(nn->rss_itbl + i));
+}
+
+/**
+ * nfp_net_rss_write_key() - Write RSS hash key to device
+ * @nn:      NFP Net device to reconfigure
+ */
+void nfp_net_rss_write_key(struct nfp_net *nn)
+{
+	int i;
+
+	for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4)
+		nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
+			  get_unaligned_le32(nn->rss_key + i));
+}
+
+/**
+ * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
+ * @nn:      NFP Net device to reconfigure
+ */
+void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
+{
+	u8 i;
+	u32 factor;
+	u32 value;
+
+	/* Compute factor used to convert coalesce '_usecs' parameters to
+	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
+	 * count.
+	 */
+	factor = nn->me_freq_mhz / 16;
+
+	/* copy RX interrupt coalesce parameters */
+	value = (nn->rx_coalesce_max_frames << 16) |
+		(factor * nn->rx_coalesce_usecs);
+	for (i = 0; i < nn->num_r_vecs; i++)
+		nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
+
+	/* copy TX interrupt coalesce parameters */
+	value = (nn->tx_coalesce_max_frames << 16) |
+		(factor * nn->tx_coalesce_usecs);
+	for (i = 0; i < nn->num_r_vecs; i++)
+		nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
+}
+
+/**
+ * nfp_net_write_mac_addr() - Write mac address to device registers
+ * @nn:      NFP Net device to reconfigure
+ * @mac:     Six-byte MAC address to be written
+ *
+ * We do a bit of byte swapping dance because firmware is LE.
+ */
+static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac)
+{
+	nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
+		  get_unaligned_be32(nn->netdev->dev_addr));
+	/* We can't do writew for NFP-3200 compatibility */
+	nn_writel(nn, NFP_NET_CFG_MACADDR + 4,
+		  get_unaligned_be16(nn->netdev->dev_addr + 4) << 16);
+}
+
+/**
+ * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
+{
+	u32 new_ctrl, update;
+	int err;
+
+	new_ctrl = nn->ctrl;
+	new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
+	update = NFP_NET_CFG_UPDATE_GEN;
+	update |= NFP_NET_CFG_UPDATE_MSIX;
+	update |= NFP_NET_CFG_UPDATE_RING;
+
+	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
+		new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
+
+	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
+	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	err = nfp_net_reconfig(nn, update);
+	if (err) {
+		nn_err(nn, "Could not disable device: %d\n", err);
+		return;
+	}
+
+	nn->ctrl = new_ctrl;
+}
+
+/**
+ * nfp_net_start_vec() - Start ring vector
+ * @nn:      NFP Net device structure
+ * @r_vec:   Ring vector to be started
+ */
+static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
+{
+	unsigned int irq_vec;
+	int err = 0;
+
+	irq_vec = nn->irq_entries[r_vec->irq_idx].vector;
+
+	disable_irq(irq_vec);
+
+	err = nfp_net_rx_fill_freelist(r_vec->rx_ring);
+	if (err) {
+		nn_err(nn, "RV%02d: couldn't allocate enough buffers\n",
+		       r_vec->irq_idx);
+		goto out;
+	}
+
+	napi_enable(&r_vec->napi);
+out:
+	enable_irq(irq_vec);
+
+	return err;
+}
+
+static int nfp_net_netdev_open(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int err, r;
+	u32 update = 0;
+	u32 new_ctrl;
+
+	if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
+		nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl);
+		return -EBUSY;
+	}
+
+	new_ctrl = nn->ctrl;
+
+	/* Step 1: Allocate resources for rings and the like
+	 * - Request interrupts
+	 * - Allocate RX and TX ring resources
+	 * - Setup initial RSS table
+	 */
+	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
+				      nn->exn_name, sizeof(nn->exn_name),
+				      NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
+	if (err)
+		return err;
+
+	err = nfp_net_alloc_rings(nn);
+	if (err)
+		goto err_free_exn;
+
+	err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings);
+	if (err)
+		goto err_free_rings;
+
+	err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings);
+	if (err)
+		goto err_free_rings;
+
+	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
+		nfp_net_rss_write_key(nn);
+		nfp_net_rss_write_itbl(nn);
+		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
+		update |= NFP_NET_CFG_UPDATE_RSS;
+	}
+
+	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
+		nfp_net_coalesce_write_cfg(nn);
+
+		new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+		update |= NFP_NET_CFG_UPDATE_IRQMOD;
+	}
+
+	/* Step 2: Configure the NFP
+	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
+	 * - Write MAC address (in case it changed)
+	 * - Set the MTU
+	 * - Set the Freelist buffer size
+	 * - Enable the FW
+	 */
+	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
+
+	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
+
+	nfp_net_write_mac_addr(nn, netdev->dev_addr);
+
+	nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu);
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+
+	/* Enable device */
+	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+	update |= NFP_NET_CFG_UPDATE_GEN;
+	update |= NFP_NET_CFG_UPDATE_MSIX;
+	update |= NFP_NET_CFG_UPDATE_RING;
+	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
+		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
+
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	err = nfp_net_reconfig(nn, update);
+	if (err)
+		goto err_clear_config;
+
+	nn->ctrl = new_ctrl;
+
+	/* Since reconfiguration requests while NFP is down are ignored we
+	 * have to wipe the entire VXLAN configuration and reinitialize it.
+	 */
+	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
+		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
+		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
+		vxlan_get_rx_port(netdev);
+	}
+
+	/* Step 3: Enable for kernel
+	 * - put some freelist descriptors on each RX ring
+	 * - enable NAPI on each ring
+	 * - enable all TX queues
+	 * - set link state
+	 */
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		err = nfp_net_start_vec(nn, &nn->r_vecs[r]);
+		if (err)
+			goto err_disable_napi;
+	}
+
+	netif_tx_wake_all_queues(netdev);
+
+	err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
+				      nn->lsc_name, sizeof(nn->lsc_name),
+				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
+	if (err)
+		goto err_stop_tx;
+	nfp_net_read_link_status(nn);
+
+	return 0;
+
+err_stop_tx:
+	netif_tx_disable(netdev);
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_tx_flush(nn->r_vecs[r].tx_ring);
+err_disable_napi:
+	while (r--) {
+		napi_disable(&nn->r_vecs[r].napi);
+		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
+	}
+err_clear_config:
+	nfp_net_clear_config_and_disable(nn);
+err_free_rings:
+	nfp_net_free_rings(nn);
+err_free_exn:
+	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
+	return err;
+}
+
+/**
+ * nfp_net_netdev_close() - Called when the device is downed
+ * @netdev:      netdev structure
+ */
+static int nfp_net_netdev_close(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int r;
+
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
+		nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
+		return 0;
+	}
+
+	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
+	 */
+	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+	netif_carrier_off(netdev);
+	nn->link_up = false;
+
+	for (r = 0; r < nn->num_r_vecs; r++)
+		napi_disable(&nn->r_vecs[r].napi);
+
+	netif_tx_disable(netdev);
+
+	/* Step 2: Tell NFP
+	 */
+	nfp_net_clear_config_and_disable(nn);
+
+	/* Step 3: Free resources
+	 */
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
+		nfp_net_tx_flush(nn->r_vecs[r].tx_ring);
+	}
+
+	nfp_net_free_rings(nn);
+	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
+
+	nn_dbg(nn, "%s down", netdev->name);
+	return 0;
+}
+
+static void nfp_net_set_rx_mode(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	u32 new_ctrl;
+
+	new_ctrl = nn->ctrl;
+
+	if (netdev->flags & IFF_PROMISC) {
+		if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
+			new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
+		else
+			nn_warn(nn, "FW does not support promiscuous mode\n");
+	} else {
+		new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
+	}
+
+	if (new_ctrl == nn->ctrl)
+		return;
+
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN))
+		return;
+
+	nn->ctrl = new_ctrl;
+}
+
+static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	u32 tmp;
+
+	nn_dbg(nn, "New MTU = %d\n", new_mtu);
+
+	if (new_mtu < 68 || new_mtu > nn->max_mtu) {
+		nn_err(nn, "New MTU (%d) is not valid\n", new_mtu);
+		return -EINVAL;
+	}
+
+	netdev->mtu = new_mtu;
+
+	/* Freelist buffer size rounded up to the nearest 1K */
+	tmp = new_mtu + ETH_HLEN + VLAN_HLEN + NFP_NET_MAX_PREPEND;
+	nn->fl_bufsz = roundup(tmp, 1024);
+
+	/* restart if running */
+	if (netif_running(netdev)) {
+		nfp_net_netdev_close(netdev);
+		nfp_net_netdev_open(netdev);
+	}
+
+	return 0;
+}
+
+static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
+						struct rtnl_link_stats64 *stats)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
+		u64 data[3];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin(&r_vec->rx_sync);
+			data[0] = r_vec->rx_pkts;
+			data[1] = r_vec->rx_bytes;
+			data[2] = r_vec->rx_drops;
+		} while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
+		stats->rx_packets += data[0];
+		stats->rx_bytes += data[1];
+		stats->rx_dropped += data[2];
+
+		do {
+			start = u64_stats_fetch_begin(&r_vec->tx_sync);
+			data[0] = r_vec->tx_pkts;
+			data[1] = r_vec->tx_bytes;
+			data[2] = r_vec->tx_errors;
+		} while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
+		stats->tx_packets += data[0];
+		stats->tx_bytes += data[1];
+		stats->tx_errors += data[2];
+	}
+
+	return stats;
+}
+
+static int nfp_net_set_features(struct net_device *netdev,
+				netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct nfp_net *nn = netdev_priv(netdev);
+	u32 new_ctrl;
+	int err;
+
+	/* Assume this is not called with features we have not advertised */
+
+	new_ctrl = nn->ctrl;
+
+	if (changed & NETIF_F_RXCSUM) {
+		if (features & NETIF_F_RXCSUM)
+			new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM;
+	}
+
+	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+		if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
+			new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
+	}
+
+	if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
+		if (features & (NETIF_F_TSO | NETIF_F_TSO6))
+			new_ctrl |= NFP_NET_CFG_CTRL_LSO;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_LSO;
+	}
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
+		if (features & NETIF_F_HW_VLAN_CTAG_RX)
+			new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN;
+	}
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
+		if (features & NETIF_F_HW_VLAN_CTAG_TX)
+			new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN;
+	}
+
+	if (changed & NETIF_F_SG) {
+		if (features & NETIF_F_SG)
+			new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
+		else
+			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
+	}
+
+	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
+	       netdev->features, features, changed);
+
+	if (new_ctrl == nn->ctrl)
+		return 0;
+
+	nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl);
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+	if (err)
+		return err;
+
+	nn->ctrl = new_ctrl;
+
+	return 0;
+}
+
+static netdev_features_t
+nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
+		       netdev_features_t features)
+{
+	u8 l4_hdr;
+
+	/* We can't do TSO over double tagged packets (802.1AD) */
+	features &= vlan_features_check(skb, features);
+
+	if (!skb->encapsulation)
+		return features;
+
+	/* Ensure that inner L4 header offset fits into TX descriptor field */
+	if (skb_is_gso(skb)) {
+		u32 hdrlen;
+
+		hdrlen = skb_inner_transport_header(skb) - skb->data +
+			inner_tcp_hdrlen(skb);
+
+		if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ))
+			features &= ~NETIF_F_GSO_MASK;
+	}
+
+	/* VXLAN/GRE check */
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+	}
+
+	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	    skb->inner_protocol != htons(ETH_P_TEB) ||
+	    (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
+	    (l4_hdr == IPPROTO_UDP &&
+	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
+	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+
+	return features;
+}
+
+/**
+ * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
+ * @nn:   NFP Net device to reconfigure
+ * @idx:  Index into the port table where new port should be written
+ * @port: UDP port to configure (pass zero to remove VXLAN port)
+ */
+static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
+{
+	int i;
+
+	nn->vxlan_ports[idx] = port;
+
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN))
+		return;
+
+	BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
+	for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2)
+		nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port),
+			  be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 |
+			  be16_to_cpu(nn->vxlan_ports[i]));
+
+	nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN);
+}
+
+/**
+ * nfp_net_find_vxlan_idx() - find table entry of the port or a free one
+ * @nn:   NFP Network structure
+ * @port: UDP port to look for
+ *
+ * Return: if the port is already in the table -- it's position;
+ *	   if the port is not in the table -- free position to use;
+ *	   if the table is full -- -ENOSPC.
+ */
+static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port)
+{
+	int i, free_idx = -ENOSPC;
+
+	for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) {
+		if (nn->vxlan_ports[i] == port)
+			return i;
+		if (!nn->vxlan_usecnt[i])
+			free_idx = i;
+	}
+
+	return free_idx;
+}
+
+static void nfp_net_add_vxlan_port(struct net_device *netdev,
+				   sa_family_t sa_family, __be16 port)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int idx;
+
+	idx = nfp_net_find_vxlan_idx(nn, port);
+	if (idx == -ENOSPC)
+		return;
+
+	if (!nn->vxlan_usecnt[idx]++)
+		nfp_net_set_vxlan_port(nn, idx, port);
+}
+
+static void nfp_net_del_vxlan_port(struct net_device *netdev,
+				   sa_family_t sa_family, __be16 port)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int idx;
+
+	idx = nfp_net_find_vxlan_idx(nn, port);
+	if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC)
+		return;
+
+	if (!--nn->vxlan_usecnt[idx])
+		nfp_net_set_vxlan_port(nn, idx, 0);
+}
+
+static const struct net_device_ops nfp_net_netdev_ops = {
+	.ndo_open		= nfp_net_netdev_open,
+	.ndo_stop		= nfp_net_netdev_close,
+	.ndo_start_xmit		= nfp_net_tx,
+	.ndo_get_stats64	= nfp_net_stat64,
+	.ndo_tx_timeout		= nfp_net_tx_timeout,
+	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
+	.ndo_change_mtu		= nfp_net_change_mtu,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= nfp_net_set_features,
+	.ndo_features_check	= nfp_net_features_check,
+	.ndo_add_vxlan_port     = nfp_net_add_vxlan_port,
+	.ndo_del_vxlan_port     = nfp_net_del_vxlan_port,
+};
+
+/**
+ * nfp_net_info() - Print general info about the NIC
+ * @nn:      NFP Net device to reconfigure
+ */
+void nfp_net_info(struct nfp_net *nn)
+{
+	nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
+		nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx",
+		nn->is_vf ? "VF " : "",
+		nn->num_tx_rings, nn->max_tx_rings,
+		nn->num_rx_rings, nn->max_rx_rings);
+	nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
+		nn->fw_ver.resv, nn->fw_ver.class,
+		nn->fw_ver.major, nn->fw_ver.minor,
+		nn->max_mtu);
+	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+		nn->cap,
+		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
+		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
+		nn->cap & NFP_NET_CFG_CTRL_L2MC     ? "L2MCFILT " : "",
+		nn->cap & NFP_NET_CFG_CTRL_RXCSUM   ? "RXCSUM "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_TXCSUM   ? "TXCSUM "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_RXVLAN   ? "RXVLAN "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_TXVLAN   ? "TXVLAN "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_SCATTER  ? "SCATTER "  : "",
+		nn->cap & NFP_NET_CFG_CTRL_GATHER   ? "GATHER "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_LSO      ? "TSO "      : "",
+		nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS "      : "",
+		nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "",
+		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
+		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
+		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
+		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "");
+}
+
+/**
+ * nfp_net_netdev_alloc() - Allocate netdev and related structure
+ * @pdev:         PCI device
+ * @max_tx_rings: Maximum number of TX rings supported by device
+ * @max_rx_rings: Maximum number of RX rings supported by device
+ *
+ * This function allocates a netdev device and fills in the initial
+ * part of the @struct nfp_net structure.
+ *
+ * Return: NFP Net device structure, or ERR_PTR on error.
+ */
+struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
+				     int max_tx_rings, int max_rx_rings)
+{
+	struct net_device *netdev;
+	struct nfp_net *nn;
+	int nqs;
+
+	netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
+				    max_tx_rings, max_rx_rings);
+	if (!netdev)
+		return ERR_PTR(-ENOMEM);
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	nn = netdev_priv(netdev);
+
+	nn->netdev = netdev;
+	nn->pdev = pdev;
+
+	nn->max_tx_rings = max_tx_rings;
+	nn->max_rx_rings = max_rx_rings;
+
+	nqs = netif_get_num_default_rss_queues();
+	nn->num_tx_rings = min_t(int, nqs, max_tx_rings);
+	nn->num_rx_rings = min_t(int, nqs, max_rx_rings);
+
+	nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
+	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
+
+	spin_lock_init(&nn->reconfig_lock);
+	spin_lock_init(&nn->link_status_lock);
+
+	return nn;
+}
+
+/**
+ * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did
+ * @nn:      NFP Net device to reconfigure
+ */
+void nfp_net_netdev_free(struct nfp_net *nn)
+{
+	free_netdev(nn->netdev);
+}
+
+/**
+ * nfp_net_rss_init() - Set the initial RSS parameters
+ * @nn:	     NFP Net device to reconfigure
+ */
+static void nfp_net_rss_init(struct nfp_net *nn)
+{
+	int i;
+
+	netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
+
+	for (i = 0; i < sizeof(nn->rss_itbl); i++)
+		nn->rss_itbl[i] =
+			ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+
+	/* Enable IPv4/IPv6 TCP by default */
+	nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
+		      NFP_NET_CFG_RSS_IPV6_TCP |
+		      NFP_NET_CFG_RSS_TOEPLITZ |
+		      NFP_NET_CFG_RSS_MASK;
+}
+
+/**
+ * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
+ * @nn:	     NFP Net device to reconfigure
+ */
+static void nfp_net_irqmod_init(struct nfp_net *nn)
+{
+	nn->rx_coalesce_usecs      = 50;
+	nn->rx_coalesce_max_frames = 64;
+	nn->tx_coalesce_usecs      = 50;
+	nn->tx_coalesce_max_frames = 64;
+}
+
+/**
+ * nfp_net_netdev_init() - Initialise/finalise the netdev structure
+ * @netdev:      netdev structure
+ *
+ * Return: 0 on success or negative errno on error.
+ */
+int nfp_net_netdev_init(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int err;
+
+	/* Get some of the read-only fields from the BAR */
+	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
+	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
+
+	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+
+	/* Set default MTU and Freelist buffer size */
+	if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
+		netdev->mtu = nn->max_mtu;
+	else
+		netdev->mtu = NFP_NET_DEFAULT_MTU;
+	nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ;
+
+	/* Advertise/enable offloads based on capabilities
+	 *
+	 * Note: netdev->features show the currently enabled features
+	 * and netdev->hw_features advertises which features are
+	 * supported.  By default we enable most features.
+	 */
+	netdev->hw_features = NETIF_F_HIGHDMA;
+	if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) {
+		netdev->hw_features |= NETIF_F_RXCSUM;
+		nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
+	}
+	if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
+		netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+		nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+	}
+	if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
+		netdev->hw_features |= NETIF_F_SG;
+		nn->ctrl |= NFP_NET_CFG_CTRL_GATHER;
+	}
+	if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) {
+		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
+		nn->ctrl |= NFP_NET_CFG_CTRL_LSO;
+	}
+	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
+		netdev->hw_features |= NETIF_F_RXHASH;
+		nfp_net_rss_init(nn);
+		nn->ctrl |= NFP_NET_CFG_CTRL_RSS;
+	}
+	if (nn->cap & NFP_NET_CFG_CTRL_VXLAN &&
+	    nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
+		if (nn->cap & NFP_NET_CFG_CTRL_LSO)
+			netdev->hw_features |= NETIF_F_GSO_GRE |
+					       NETIF_F_GSO_UDP_TUNNEL;
+		nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
+
+		netdev->hw_enc_features = netdev->hw_features;
+	}
+
+	netdev->vlan_features = netdev->hw_features;
+
+	if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+		nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
+	}
+	if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+		nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+	}
+
+	netdev->features = netdev->hw_features;
+
+	/* Advertise but disable TSO by default. */
+	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
+
+	/* Allow L2 Broadcast and Multicast through by default, if supported */
+	if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
+		nn->ctrl |= NFP_NET_CFG_CTRL_L2BC;
+	if (nn->cap & NFP_NET_CFG_CTRL_L2MC)
+		nn->ctrl |= NFP_NET_CFG_CTRL_L2MC;
+
+	/* Allow IRQ moderation, if supported */
+	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
+		nfp_net_irqmod_init(nn);
+		nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+	}
+
+	/* On NFP-3200 enable MSI-X auto-masking, if supported and the
+	 * interrupts are not shared.
+	 */
+	if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO)
+		nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO;
+
+	/* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */
+	if (nn->fw_ver.major >= 2)
+		nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
+	else
+		nn->rx_offset = NFP_NET_RX_OFFSET;
+
+	/* Stash the re-configuration queue away.  First odd queue in TX Bar */
+	nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
+
+	/* Make sure the FW knows the netdev is supposed to be disabled here */
+	nn_writel(nn, NFP_NET_CFG_CTRL, 0);
+	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
+	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
+				   NFP_NET_CFG_UPDATE_GEN);
+	if (err)
+		return err;
+
+	/* Finalise the netdev setup */
+	ether_setup(netdev);
+	netdev->netdev_ops = &nfp_net_netdev_ops;
+	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
+
+	nfp_net_set_ethtool_ops(netdev);
+	nfp_net_irqs_assign(netdev);
+
+	return register_netdev(netdev);
+}
+
+/**
+ * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did.
+ * @netdev:      netdev structure
+ */
+void nfp_net_netdev_clean(struct net_device *netdev)
+{
+	unregister_netdev(netdev);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
new file mode 100644
index 000000000000..8692003aeed8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_ctrl.h
+ * Netronome network device driver: Control BAR layout
+ * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
+ *          Jason McMullan <jason.mcmullan@netronome.com>
+ *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
+ *          Brad Petrus <brad.petrus@netronome.com>
+ */
+
+#ifndef _NFP_NET_CTRL_H_
+#define _NFP_NET_CTRL_H_
+
+/* IMPORTANT: This header file is shared with the FW,
+ *	      no OS specific constructs, please!
+ */
+
+/**
+ * Configuration BAR size.
+ *
+ * The configuration BAR is 8K in size, but on the NFP6000, due to
+ * THB-350, 32k needs to be reserved.
+ */
+#define NFP_NET_CFG_BAR_SZ              (32 * 1024)
+
+/**
+ * Offset in Freelist buffer where packet starts on RX
+ */
+#define NFP_NET_RX_OFFSET               32
+
+/**
+ * Maximum header size supported for LSO frames
+ */
+#define NFP_NET_LSO_MAX_HDR_SZ		255
+
+/**
+ * Hash type pre-pended when a RSS hash was computed
+ */
+#define NFP_NET_RSS_NONE                0
+#define NFP_NET_RSS_IPV4                1
+#define NFP_NET_RSS_IPV6                2
+#define NFP_NET_RSS_IPV6_EX             3
+#define NFP_NET_RSS_IPV4_TCP            4
+#define NFP_NET_RSS_IPV6_TCP            5
+#define NFP_NET_RSS_IPV6_EX_TCP         6
+#define NFP_NET_RSS_IPV4_UDP            7
+#define NFP_NET_RSS_IPV6_UDP            8
+#define NFP_NET_RSS_IPV6_EX_UDP         9
+
+/**
+ * @NFP_NET_TXR_MAX:         Maximum number of TX rings
+ * @NFP_NET_TXR_MASK:        Mask for TX rings
+ * @NFP_NET_RXR_MAX:         Maximum number of RX rings
+ * @NFP_NET_RXR_MASK:        Mask for RX rings
+ */
+#define NFP_NET_TXR_MAX                 64
+#define NFP_NET_TXR_MASK                (NFP_NET_TXR_MAX - 1)
+#define NFP_NET_RXR_MAX                 64
+#define NFP_NET_RXR_MASK                (NFP_NET_RXR_MAX - 1)
+
+/**
+ * Read/Write config words (0x0000 - 0x002c)
+ * @NFP_NET_CFG_CTRL:        Global control
+ * @NFP_NET_CFG_UPDATE:      Indicate which fields are updated
+ * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
+ * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
+ * @NFP_NET_CFG_MTU:         Set MTU size
+ * @NFP_NET_CFG_FLBUFSZ:     Set freelist buffer size (must be larger than MTU)
+ * @NFP_NET_CFG_EXN:         MSI-X table entry for exceptions
+ * @NFP_NET_CFG_LSC:         MSI-X table entry for link state changes
+ * @NFP_NET_CFG_MACADDR:     MAC address
+ *
+ * TODO:
+ * - define Error details in UPDATE
+ */
+#define NFP_NET_CFG_CTRL                0x0000
+#define   NFP_NET_CFG_CTRL_ENABLE         (0x1 <<  0) /* Global enable */
+#define   NFP_NET_CFG_CTRL_PROMISC        (0x1 <<  1) /* Enable Promisc mode */
+#define   NFP_NET_CFG_CTRL_L2BC           (0x1 <<  2) /* Allow L2 Broadcast */
+#define   NFP_NET_CFG_CTRL_L2MC           (0x1 <<  3) /* Allow L2 Multicast */
+#define   NFP_NET_CFG_CTRL_RXCSUM         (0x1 <<  4) /* Enable RX Checksum */
+#define   NFP_NET_CFG_CTRL_TXCSUM         (0x1 <<  5) /* Enable TX Checksum */
+#define   NFP_NET_CFG_CTRL_RXVLAN         (0x1 <<  6) /* Enable VLAN strip */
+#define   NFP_NET_CFG_CTRL_TXVLAN         (0x1 <<  7) /* Enable VLAN insert */
+#define   NFP_NET_CFG_CTRL_SCATTER        (0x1 <<  8) /* Scatter DMA */
+#define   NFP_NET_CFG_CTRL_GATHER         (0x1 <<  9) /* Gather DMA */
+#define   NFP_NET_CFG_CTRL_LSO            (0x1 << 10) /* LSO/TSO */
+#define   NFP_NET_CFG_CTRL_RINGCFG        (0x1 << 16) /* Ring runtime changes */
+#define   NFP_NET_CFG_CTRL_RSS            (0x1 << 17) /* RSS */
+#define   NFP_NET_CFG_CTRL_IRQMOD         (0x1 << 18) /* Interrupt moderation */
+#define   NFP_NET_CFG_CTRL_RINGPRIO       (0x1 << 19) /* Ring priorities */
+#define   NFP_NET_CFG_CTRL_MSIXAUTO       (0x1 << 20) /* MSI-X auto-masking */
+#define   NFP_NET_CFG_CTRL_TXRWB          (0x1 << 21) /* Write-back of TX ring*/
+#define   NFP_NET_CFG_CTRL_L2SWITCH       (0x1 << 22) /* L2 Switch */
+#define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
+#define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
+#define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
+#define NFP_NET_CFG_UPDATE              0x0004
+#define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
+#define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
+#define   NFP_NET_CFG_UPDATE_RSS          (0x1 <<  2) /* RSS config change */
+#define   NFP_NET_CFG_UPDATE_TXRPRIO      (0x1 <<  3) /* TX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_RXRPRIO      (0x1 <<  4) /* RX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_MSIX         (0x1 <<  5) /* MSI-X change */
+#define   NFP_NET_CFG_UPDATE_L2SWITCH     (0x1 <<  6) /* Switch changes */
+#define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
+#define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
+#define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
+#define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE         0x0008
+#define NFP_NET_CFG_RXRS_ENABLE         0x0010
+#define NFP_NET_CFG_MTU                 0x0018
+#define NFP_NET_CFG_FLBUFSZ             0x001c
+#define NFP_NET_CFG_EXN                 0x001f
+#define NFP_NET_CFG_LSC                 0x0020
+#define NFP_NET_CFG_MACADDR             0x0024
+
+/**
+ * Read-only words (0x0030 - 0x0050):
+ * @NFP_NET_CFG_VERSION:     Firmware version number
+ * @NFP_NET_CFG_STS:         Status
+ * @NFP_NET_CFG_CAP:         Capabilities (same bits as @NFP_NET_CFG_CTRL)
+ * @NFP_NET_MAX_TXRINGS:     Maximum number of TX rings
+ * @NFP_NET_MAX_RXRINGS:     Maximum number of RX rings
+ * @NFP_NET_MAX_MTU:         Maximum support MTU
+ * @NFP_NET_CFG_START_TXQ:   Start Queue Control Queue to use for TX (PF only)
+ * @NFP_NET_CFG_START_RXQ:   Start Queue Control Queue to use for RX (PF only)
+ *
+ * TODO:
+ * - define more STS bits
+ */
+#define NFP_NET_CFG_VERSION             0x0030
+#define   NFP_NET_CFG_VERSION_RESERVED_MASK	(0xff << 24)
+#define   NFP_NET_CFG_VERSION_CLASS_MASK  (0xff << 16)
+#define   NFP_NET_CFG_VERSION_CLASS(x)    (((x) & 0xff) << 16)
+#define   NFP_NET_CFG_VERSION_CLASS_GENERIC	0
+#define   NFP_NET_CFG_VERSION_MAJOR_MASK  (0xff <<  8)
+#define   NFP_NET_CFG_VERSION_MAJOR(x)    (((x) & 0xff) <<  8)
+#define   NFP_NET_CFG_VERSION_MINOR_MASK  (0xff <<  0)
+#define   NFP_NET_CFG_VERSION_MINOR(x)    (((x) & 0xff) <<  0)
+#define NFP_NET_CFG_STS                 0x0034
+#define   NFP_NET_CFG_STS_LINK            (0x1 << 0) /* Link up or down */
+#define NFP_NET_CFG_CAP                 0x0038
+#define NFP_NET_CFG_MAX_TXRINGS         0x003c
+#define NFP_NET_CFG_MAX_RXRINGS         0x0040
+#define NFP_NET_CFG_MAX_MTU             0x0044
+/* Next two words are being used by VFs for solving THB350 issue */
+#define NFP_NET_CFG_START_TXQ           0x0048
+#define NFP_NET_CFG_START_RXQ           0x004c
+
+/**
+ * NFP-3200 workaround (0x0050 - 0x0058)
+ * @NFP_NET_CFG_SPARE_ADDR:  DMA address for ME code to use (e.g. YDS-155 fix)
+ */
+#define NFP_NET_CFG_SPARE_ADDR          0x0050
+/**
+ * NFP6000/NFP4000 - Prepend configuration
+ */
+#define NFP_NET_CFG_RX_OFFSET		0x0050
+#define NFP_NET_CFG_RX_OFFSET_DYNAMIC		0	/* Prepend mode */
+
+/**
+ * NFP6000/NFP4000 - VXLAN/UDP encap configuration
+ * @NFP_NET_CFG_VXLAN_PORT:	Base address of table of tunnels' UDP dst ports
+ * @NFP_NET_CFG_VXLAN_SZ:	Size of the UDP port table in bytes
+ */
+#define NFP_NET_CFG_VXLAN_PORT		0x0060
+#define NFP_NET_CFG_VXLAN_SZ		  0x0008
+
+/**
+ * 64B reserved for future use (0x0080 - 0x00c0)
+ */
+#define NFP_NET_CFG_RESERVED            0x0080
+#define NFP_NET_CFG_RESERVED_SZ         0x0040
+
+/**
+ * RSS configuration (0x0100 - 0x01ac):
+ * Used only when NFP_NET_CFG_CTRL_RSS is enabled
+ * @NFP_NET_CFG_RSS_CFG:     RSS configuration word
+ * @NFP_NET_CFG_RSS_KEY:     RSS "secret" key
+ * @NFP_NET_CFG_RSS_ITBL:    RSS indirection table
+ */
+#define NFP_NET_CFG_RSS_BASE            0x0100
+#define NFP_NET_CFG_RSS_CTRL            NFP_NET_CFG_RSS_BASE
+#define   NFP_NET_CFG_RSS_MASK            (0x7f)
+#define   NFP_NET_CFG_RSS_MASK_of(_x)     ((_x) & 0x7f)
+#define   NFP_NET_CFG_RSS_IPV4            (1 <<  8) /* RSS for IPv4 */
+#define   NFP_NET_CFG_RSS_IPV6            (1 <<  9) /* RSS for IPv6 */
+#define   NFP_NET_CFG_RSS_IPV4_TCP        (1 << 10) /* RSS for IPv4/TCP */
+#define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
+#define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
+#define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ          0x28
+#define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
+					 NFP_NET_CFG_RSS_KEY_SZ)
+#define NFP_NET_CFG_RSS_ITBL_SZ         0x80
+
+/**
+ * TX ring configuration (0x200 - 0x800)
+ * @NFP_NET_CFG_TXR_BASE:    Base offset for TX ring configuration
+ * @NFP_NET_CFG_TXR_ADDR:    Per TX ring DMA address (8B entries)
+ * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries)
+ * @NFP_NET_CFG_TXR_SZ:      Per TX ring ring size (1B entries)
+ * @NFP_NET_CFG_TXR_VEC:     Per TX ring MSI-X table entry (1B entries)
+ * @NFP_NET_CFG_TXR_PRIO:    Per TX ring priority (1B entries)
+ * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
+ */
+#define NFP_NET_CFG_TXR_BASE            0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x)        (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x)     (NFP_NET_CFG_TXR_BASE + 0x200 + \
+					 ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_SZ(_x)          (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x)         (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x)        (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_IRQ_MOD(_x)	(NFP_NET_CFG_TXR_BASE + 0x500 + \
+					 ((_x) * 0x4))
+
+/**
+ * RX ring configuration (0x0800 - 0x0c00)
+ * @NFP_NET_CFG_RXR_BASE:    Base offset for RX ring configuration
+ * @NFP_NET_CFG_RXR_ADDR:    Per RX ring DMA address (8B entries)
+ * @NFP_NET_CFG_RXR_SZ:      Per RX ring ring size (1B entries)
+ * @NFP_NET_CFG_RXR_VEC:     Per RX ring MSI-X table entry (1B entries)
+ * @NFP_NET_CFG_RXR_PRIO:    Per RX ring priority (1B entries)
+ * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
+ */
+#define NFP_NET_CFG_RXR_BASE            0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x)        (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x)          (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x)         (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x)        (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_IRQ_MOD(_x)	(NFP_NET_CFG_RXR_BASE + 0x300 + \
+					 ((_x) * 0x4))
+
+/**
+ * Interrupt Control/Cause registers (0x0c00 - 0x0d00)
+ * These registers are only used when MSI-X auto-masking is not
+ * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set).  The array is index
+ * by MSI-X entry and are 1B in size.  If an entry is zero, the
+ * corresponding entry is enabled.  If the FW generates an interrupt,
+ * it writes a cause into the corresponding field.  This also masks
+ * the MSI-X entry and the host driver must clear the register to
+ * re-enable the interrupt.
+ */
+#define NFP_NET_CFG_ICR_BASE            0x0c00
+#define NFP_NET_CFG_ICR(_x)             (NFP_NET_CFG_ICR_BASE + (_x))
+#define   NFP_NET_CFG_ICR_UNMASKED      0x0
+#define   NFP_NET_CFG_ICR_RXTX          0x1
+#define   NFP_NET_CFG_ICR_LSC           0x2
+
+/**
+ * General device stats (0x0d00 - 0x0d90)
+ * all counters are 64bit.
+ */
+#define NFP_NET_CFG_STATS_BASE          0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x40)
+
+#define NFP_NET_CFG_STATS_TX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
+
+/**
+ * Per ring stats (0x1000 - 0x1800)
+ * options, 64bit per entry
+ * @NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
+ * @NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
+ */
+#define NFP_NET_CFG_TXR_STATS_BASE      0x1000
+#define NFP_NET_CFG_TXR_STATS(_x)       (NFP_NET_CFG_TXR_STATS_BASE + \
+					 ((_x) * 0x10))
+#define NFP_NET_CFG_RXR_STATS_BASE      0x1400
+#define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
+					 ((_x) * 0x10))
+
+#endif /* _NFP_NET_CTRL_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
new file mode 100644
index 000000000000..4c97c713121c
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+
+#include "nfp_net.h"
+
+static struct dentry *nfp_dir;
+
+static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
+{
+	struct nfp_net_rx_ring *rx_ring = file->private;
+	int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt;
+	struct nfp_net_rx_desc *rxd;
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+	int i;
+
+	rtnl_lock();
+
+	if (!rx_ring->r_vec || !rx_ring->r_vec->nfp_net)
+		goto out;
+	nn = rx_ring->r_vec->nfp_net;
+	if (!netif_running(nn->netdev))
+		goto out;
+
+	rxd_cnt = rx_ring->cnt;
+
+	fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl);
+	fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl);
+	rx_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_rx);
+	rx_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx);
+
+	seq_printf(file, "RX[%02d]: H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d RX_RD=%d RX_WR=%d\n",
+		   rx_ring->idx, rx_ring->rd_p, rx_ring->wr_p,
+		   fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p);
+
+	for (i = 0; i < rxd_cnt; i++) {
+		rxd = &rx_ring->rxds[i];
+		seq_printf(file, "%04d: 0x%08x 0x%08x", i,
+			   rxd->vals[0], rxd->vals[1]);
+
+		skb = READ_ONCE(rx_ring->rxbufs[i].skb);
+		if (skb)
+			seq_printf(file, " skb->head=%p skb->data=%p",
+				   skb->head, skb->data);
+
+		if (rx_ring->rxbufs[i].dma_addr)
+			seq_printf(file, " dma_addr=%pad",
+				   &rx_ring->rxbufs[i].dma_addr);
+
+		if (i == rx_ring->rd_p % rxd_cnt)
+			seq_puts(file, " H_RD ");
+		if (i == rx_ring->wr_p % rxd_cnt)
+			seq_puts(file, " H_WR ");
+		if (i == fl_rd_p % rxd_cnt)
+			seq_puts(file, " FL_RD");
+		if (i == fl_wr_p % rxd_cnt)
+			seq_puts(file, " FL_WR");
+		if (i == rx_rd_p % rxd_cnt)
+			seq_puts(file, " RX_RD");
+		if (i == rx_wr_p % rxd_cnt)
+			seq_puts(file, " RX_WR");
+
+		seq_putc(file, '\n');
+	}
+out:
+	rtnl_unlock();
+	return 0;
+}
+
+static int nfp_net_debugfs_rx_q_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, nfp_net_debugfs_rx_q_read, inode->i_private);
+}
+
+static const struct file_operations nfp_rx_q_fops = {
+	.owner = THIS_MODULE,
+	.open = nfp_net_debugfs_rx_q_open,
+	.release = single_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
+{
+	struct nfp_net_tx_ring *tx_ring = file->private;
+	struct nfp_net_tx_desc *txd;
+	int d_rd_p, d_wr_p, txd_cnt;
+	struct sk_buff *skb;
+	struct nfp_net *nn;
+	int i;
+
+	rtnl_lock();
+
+	if (!tx_ring->r_vec || !tx_ring->r_vec->nfp_net)
+		goto out;
+	nn = tx_ring->r_vec->nfp_net;
+	if (!netif_running(nn->netdev))
+		goto out;
+
+	txd_cnt = tx_ring->cnt;
+
+	d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+	d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q);
+
+	seq_printf(file, "TX[%02d]: H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n",
+		   tx_ring->idx, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
+
+	for (i = 0; i < txd_cnt; i++) {
+		txd = &tx_ring->txds[i];
+		seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i,
+			   txd->vals[0], txd->vals[1],
+			   txd->vals[2], txd->vals[3]);
+
+		skb = READ_ONCE(tx_ring->txbufs[i].skb);
+		if (skb)
+			seq_printf(file, " skb->head=%p skb->data=%p",
+				   skb->head, skb->data);
+		if (tx_ring->txbufs[i].dma_addr)
+			seq_printf(file, " dma_addr=%pad",
+				   &tx_ring->txbufs[i].dma_addr);
+
+		if (i == tx_ring->rd_p % txd_cnt)
+			seq_puts(file, " H_RD");
+		if (i == tx_ring->wr_p % txd_cnt)
+			seq_puts(file, " H_WR");
+		if (i == d_rd_p % txd_cnt)
+			seq_puts(file, " D_RD");
+		if (i == d_wr_p % txd_cnt)
+			seq_puts(file, " D_WR");
+
+		seq_putc(file, '\n');
+	}
+out:
+	rtnl_unlock();
+	return 0;
+}
+
+static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private);
+}
+
+static const struct file_operations nfp_tx_q_fops = {
+	.owner = THIS_MODULE,
+	.open = nfp_net_debugfs_tx_q_open,
+	.release = single_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
+{
+	static struct dentry *queues, *tx, *rx;
+	char int_name[16];
+	int i;
+
+	if (IS_ERR_OR_NULL(nfp_dir))
+		return;
+
+	nn->debugfs_dir = debugfs_create_dir(pci_name(nn->pdev), nfp_dir);
+	if (IS_ERR_OR_NULL(nn->debugfs_dir))
+		return;
+
+	/* Create queue debugging sub-tree */
+	queues = debugfs_create_dir("queue", nn->debugfs_dir);
+	if (IS_ERR_OR_NULL(nn->debugfs_dir))
+		return;
+
+	rx = debugfs_create_dir("rx", queues);
+	tx = debugfs_create_dir("tx", queues);
+	if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx))
+		return;
+
+	for (i = 0; i < nn->num_rx_rings; i++) {
+		sprintf(int_name, "%d", i);
+		debugfs_create_file(int_name, S_IRUSR, rx,
+				    &nn->rx_rings[i], &nfp_rx_q_fops);
+	}
+
+	for (i = 0; i < nn->num_tx_rings; i++) {
+		sprintf(int_name, "%d", i);
+		debugfs_create_file(int_name, S_IRUSR, tx,
+				    &nn->tx_rings[i], &nfp_tx_q_fops);
+	}
+}
+
+void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
+{
+	debugfs_remove_recursive(nn->debugfs_dir);
+	nn->debugfs_dir = NULL;
+}
+
+void nfp_net_debugfs_create(void)
+{
+	nfp_dir = debugfs_create_dir("nfp_net", NULL);
+}
+
+void nfp_net_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(nfp_dir);
+	nfp_dir = NULL;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
new file mode 100644
index 000000000000..9a4084a68db5
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_ethtool.c
+ * Netronome network device driver: ethtool support
+ * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
+ *          Jason McMullan <jason.mcmullan@netronome.com>
+ *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
+ *          Brad Petrus <brad.petrus@netronome.com>
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+/* Support for stats. Returns netdev, driver, and device stats */
+enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS };
+struct _nfp_net_et_stats {
+	char name[ETH_GSTRING_LEN];
+	int type;
+	int sz;
+	int off;
+};
+
+#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS,			\
+		FIELD_SIZEOF(struct net_device_stats, m),	\
+		offsetof(struct net_device_stats, m)
+/* For stats in the control BAR (other than Q stats) */
+#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS,			\
+		sizeof(u64),					\
+		(m)
+static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
+	/* netdev stats */
+	{"rx_packets", NN_ET_NETDEV_STAT(rx_packets)},
+	{"tx_packets", NN_ET_NETDEV_STAT(tx_packets)},
+	{"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)},
+	{"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)},
+	{"rx_errors", NN_ET_NETDEV_STAT(rx_errors)},
+	{"tx_errors", NN_ET_NETDEV_STAT(tx_errors)},
+	{"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)},
+	{"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)},
+	{"multicast", NN_ET_NETDEV_STAT(multicast)},
+	{"collisions", NN_ET_NETDEV_STAT(collisions)},
+	{"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)},
+	{"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)},
+	{"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)},
+	{"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)},
+	{"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)},
+	{"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)},
+	{"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)},
+	{"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)},
+	/* Stats from the device */
+	{"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)},
+	{"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)},
+	{"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)},
+	{"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)},
+	{"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)},
+	{"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)},
+	{"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)},
+	{"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)},
+	{"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)},
+
+	{"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)},
+	{"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)},
+	{"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)},
+	{"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)},
+	{"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)},
+	{"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)},
+	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
+	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
+	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
+};
+
+#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
+#define NN_ET_RVEC_STATS_LEN (nn->num_r_vecs * 3)
+#define NN_ET_RVEC_GATHER_STATS 7
+#define NN_ET_QUEUE_STATS_LEN ((nn->num_tx_rings + nn->num_rx_rings) * 2)
+#define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \
+			 NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN)
+
+static void nfp_net_get_drvinfo(struct net_device *netdev,
+				struct ethtool_drvinfo *drvinfo)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver, nfp_net_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, nfp_net_driver_version,
+		sizeof(drvinfo->version));
+
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d.%d.%d",
+		 nn->fw_ver.resv, nn->fw_ver.class,
+		 nn->fw_ver.major, nn->fw_ver.minor);
+	strlcpy(drvinfo->bus_info, pci_name(nn->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_stats = NN_ET_STATS_LEN;
+	drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ;
+}
+
+static void nfp_net_get_ringparam(struct net_device *netdev,
+				  struct ethtool_ringparam *ring)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	ring->rx_max_pending = NFP_NET_MAX_RX_DESCS;
+	ring->tx_max_pending = NFP_NET_MAX_TX_DESCS;
+	ring->rx_pending = nn->rxd_cnt;
+	ring->tx_pending = nn->txd_cnt;
+}
+
+static int nfp_net_set_ringparam(struct net_device *netdev,
+				 struct ethtool_ringparam *ring)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	u32 rxd_cnt, txd_cnt;
+
+	if (netif_running(netdev)) {
+		/* Some NIC drivers allow reconfiguration on the fly,
+		 * some down the interface, change and then up it
+		 * again.  For now we don't allow changes when the
+		 * device is up.
+		 */
+		nn_warn(nn, "Can't change rings while device is up\n");
+		return -EBUSY;
+	}
+
+	/* We don't have separate queues/rings for small/large frames. */
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+		return -EINVAL;
+
+	/* Round up to supported values */
+	rxd_cnt = roundup_pow_of_two(ring->rx_pending);
+	rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS);
+	rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS);
+
+	txd_cnt = roundup_pow_of_two(ring->tx_pending);
+	txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS);
+	txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS);
+
+	if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt)
+		nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
+		       nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+
+	nn->rxd_cnt = rxd_cnt;
+	nn->txd_cnt = txd_cnt;
+
+	return 0;
+}
+
+static void nfp_net_get_strings(struct net_device *netdev,
+				u32 stringset, u8 *data)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < nn->num_r_vecs; i++) {
+			sprintf(p, "rvec_%u_rx_pkts", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rvec_%u_tx_pkts", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rvec_%u_tx_busy", i);
+			p += ETH_GSTRING_LEN;
+		}
+		strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "tx_gather", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		strncpy(p, "tx_lso", ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+		for (i = 0; i < nn->num_tx_rings; i++) {
+			sprintf(p, "txq_%u_pkts", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "txq_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < nn->num_rx_rings; i++) {
+			sprintf(p, "rxq_%u_pkts", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rxq_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	}
+}
+
+static void nfp_net_get_stats(struct net_device *netdev,
+			      struct ethtool_stats *stats, u64 *data)
+{
+	u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {};
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct rtnl_link_stats64 *netdev_stats;
+	struct rtnl_link_stats64 temp = {};
+	u64 tmp[NN_ET_RVEC_GATHER_STATS];
+	u8 __iomem *io_p;
+	int i, j, k;
+	u8 *p;
+
+	netdev_stats = dev_get_stats(netdev, &temp);
+
+	for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
+		switch (nfp_net_et_stats[i].type) {
+		case NETDEV_ET_STATS:
+			p = (char *)netdev_stats + nfp_net_et_stats[i].off;
+			data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ?
+				*(u64 *)p : *(u32 *)p;
+			break;
+
+		case NFP_NET_DEV_ET_STATS:
+			io_p = nn->ctrl_bar + nfp_net_et_stats[i].off;
+			data[i] = readq(io_p);
+			break;
+		}
+	}
+	for (j = 0; j < nn->num_r_vecs; j++) {
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin(&nn->r_vecs[j].rx_sync);
+			data[i++] = nn->r_vecs[j].rx_pkts;
+			tmp[0] = nn->r_vecs[j].hw_csum_rx_ok;
+			tmp[1] = nn->r_vecs[j].hw_csum_rx_inner_ok;
+			tmp[2] = nn->r_vecs[j].hw_csum_rx_error;
+		} while (u64_stats_fetch_retry(&nn->r_vecs[j].rx_sync, start));
+
+		do {
+			start = u64_stats_fetch_begin(&nn->r_vecs[j].tx_sync);
+			data[i++] = nn->r_vecs[j].tx_pkts;
+			data[i++] = nn->r_vecs[j].tx_busy;
+			tmp[3] = nn->r_vecs[j].hw_csum_tx;
+			tmp[4] = nn->r_vecs[j].hw_csum_tx_inner;
+			tmp[5] = nn->r_vecs[j].tx_gather;
+			tmp[6] = nn->r_vecs[j].tx_lso;
+		} while (u64_stats_fetch_retry(&nn->r_vecs[j].tx_sync, start));
+
+		for (k = 0; k < NN_ET_RVEC_GATHER_STATS; k++)
+			gathered_stats[k] += tmp[k];
+	}
+	for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
+		data[i++] = gathered_stats[j];
+	for (j = 0; j < nn->num_tx_rings; j++) {
+		io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j);
+		data[i++] = readq(io_p);
+		io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8;
+		data[i++] = readq(io_p);
+	}
+	for (j = 0; j < nn->num_rx_rings; j++) {
+		io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j);
+		data[i++] = readq(io_p);
+		io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8;
+		data[i++] = readq(io_p);
+	}
+}
+
+static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		return NN_ET_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/* RX network flow classification (RSS, filters, etc)
+ */
+static u32 ethtool_flow_to_nfp_flag(u32 flow_type)
+{
+	static const u32 xlate_ethtool_to_nfp[IPV6_FLOW + 1] = {
+		[TCP_V4_FLOW]	= NFP_NET_CFG_RSS_IPV4_TCP,
+		[TCP_V6_FLOW]	= NFP_NET_CFG_RSS_IPV6_TCP,
+		[UDP_V4_FLOW]	= NFP_NET_CFG_RSS_IPV4_UDP,
+		[UDP_V6_FLOW]	= NFP_NET_CFG_RSS_IPV6_UDP,
+		[IPV4_FLOW]	= NFP_NET_CFG_RSS_IPV4,
+		[IPV6_FLOW]	= NFP_NET_CFG_RSS_IPV6,
+	};
+
+	if (flow_type >= ARRAY_SIZE(xlate_ethtool_to_nfp))
+		return 0;
+
+	return xlate_ethtool_to_nfp[flow_type];
+}
+
+static int nfp_net_get_rss_hash_opts(struct nfp_net *nn,
+				     struct ethtool_rxnfc *cmd)
+{
+	u32 nfp_rss_flag;
+
+	cmd->data = 0;
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_RSS))
+		return -EOPNOTSUPP;
+
+	nfp_rss_flag = ethtool_flow_to_nfp_flag(cmd->flow_type);
+	if (!nfp_rss_flag)
+		return -EINVAL;
+
+	cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+	if (nn->rss_cfg & nfp_rss_flag)
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+	return 0;
+}
+
+static int nfp_net_get_rxnfc(struct net_device *netdev,
+			     struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = nn->num_rx_rings;
+		return 0;
+	case ETHTOOL_GRXFH:
+		return nfp_net_get_rss_hash_opts(nn, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int nfp_net_set_rss_hash_opt(struct nfp_net *nn,
+				    struct ethtool_rxnfc *nfc)
+{
+	u32 new_rss_cfg = nn->rss_cfg;
+	u32 nfp_rss_flag;
+	int err;
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_RSS))
+		return -EOPNOTSUPP;
+
+	/* RSS only supports IP SA/DA and L4 src/dst ports  */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	/* We need at least the IP SA/DA fields for hashing */
+	if (!(nfc->data & RXH_IP_SRC) ||
+	    !(nfc->data & RXH_IP_DST))
+		return -EINVAL;
+
+	nfp_rss_flag = ethtool_flow_to_nfp_flag(nfc->flow_type);
+	if (!nfp_rss_flag)
+		return -EINVAL;
+
+	switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+	case 0:
+		new_rss_cfg &= ~nfp_rss_flag;
+		break;
+	case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+		new_rss_cfg |= nfp_rss_flag;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	new_rss_cfg |= NFP_NET_CFG_RSS_TOEPLITZ;
+	new_rss_cfg |= NFP_NET_CFG_RSS_MASK;
+
+	if (new_rss_cfg == nn->rss_cfg)
+		return 0;
+
+	writel(new_rss_cfg, nn->ctrl_bar + NFP_NET_CFG_RSS_CTRL);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS);
+	if (err)
+		return err;
+
+	nn->rss_cfg = new_rss_cfg;
+
+	nn_dbg(nn, "Changed RSS config to 0x%x\n", nn->rss_cfg);
+	return 0;
+}
+
+static int nfp_net_set_rxnfc(struct net_device *netdev,
+			     struct ethtool_rxnfc *cmd)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		return nfp_net_set_rss_hash_opt(nn, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_RSS))
+		return 0;
+
+	return ARRAY_SIZE(nn->rss_itbl);
+}
+
+static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev)
+{
+	return NFP_NET_CFG_RSS_KEY_SZ;
+}
+
+static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			    u8 *hfunc)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int i;
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_RSS))
+		return -EOPNOTSUPP;
+
+	if (indir)
+		for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++)
+			indir[i] = nn->rss_itbl[i];
+	if (key)
+		memcpy(key, nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	return 0;
+}
+
+static int nfp_net_set_rxfh(struct net_device *netdev,
+			    const u32 *indir, const u8 *key,
+			    const u8 hfunc)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	int i;
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
+	    !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!key && !indir)
+		return 0;
+
+	if (key) {
+		memcpy(nn->rss_key, key, NFP_NET_CFG_RSS_KEY_SZ);
+		nfp_net_rss_write_key(nn);
+	}
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++)
+			nn->rss_itbl[i] = indir[i];
+
+		nfp_net_rss_write_itbl(nn);
+	}
+
+	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS);
+}
+
+/* Dump BAR registers
+ */
+static int nfp_net_get_regs_len(struct net_device *netdev)
+{
+	return NFP_NET_CFG_BAR_SZ;
+}
+
+static void nfp_net_get_regs(struct net_device *netdev,
+			     struct ethtool_regs *regs, void *p)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	u32 *regs_buf = p;
+	int i;
+
+	regs->version = nn_readl(nn, NFP_NET_CFG_VERSION);
+
+	for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++)
+		regs_buf[i] = readl(nn->ctrl_bar + (i * sizeof(u32)));
+}
+
+static int nfp_net_get_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD))
+		return -EINVAL;
+
+	ec->rx_coalesce_usecs       = nn->rx_coalesce_usecs;
+	ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames;
+	ec->tx_coalesce_usecs       = nn->tx_coalesce_usecs;
+	ec->tx_max_coalesced_frames = nn->tx_coalesce_max_frames;
+
+	return 0;
+}
+
+static int nfp_net_set_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	unsigned int factor;
+
+	if (ec->rx_coalesce_usecs_irq ||
+	    ec->rx_max_coalesced_frames_irq ||
+	    ec->tx_coalesce_usecs_irq ||
+	    ec->tx_max_coalesced_frames_irq ||
+	    ec->stats_block_coalesce_usecs ||
+	    ec->use_adaptive_rx_coalesce ||
+	    ec->use_adaptive_tx_coalesce ||
+	    ec->pkt_rate_low ||
+	    ec->rx_coalesce_usecs_low ||
+	    ec->rx_max_coalesced_frames_low ||
+	    ec->tx_coalesce_usecs_low ||
+	    ec->tx_max_coalesced_frames_low ||
+	    ec->pkt_rate_high ||
+	    ec->rx_coalesce_usecs_high ||
+	    ec->rx_max_coalesced_frames_high ||
+	    ec->tx_coalesce_usecs_high ||
+	    ec->tx_max_coalesced_frames_high ||
+	    ec->rate_sample_interval)
+		return -ENOTSUPP;
+
+	/* Compute factor used to convert coalesce '_usecs' parameters to
+	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
+	 * count.
+	 */
+	factor = nn->me_freq_mhz / 16;
+
+	/* Each pair of (usecs, max_frames) fields specifies that interrupts
+	 * should be coalesced until
+	 *      (usecs > 0 && time_since_first_completion >= usecs) ||
+	 *      (max_frames > 0 && completed_frames >= max_frames)
+	 *
+	 * It is illegal to set both usecs and max_frames to zero as this would
+	 * cause interrupts to never be generated.  To disable coalescing, set
+	 * usecs = 0 and max_frames = 1.
+	 *
+	 * Some implementations ignore the value of max_frames and use the
+	 * condition time_since_first_completion >= usecs
+	 */
+
+	if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD))
+		return -EINVAL;
+
+	/* ensure valid configuration */
+	if (!ec->rx_coalesce_usecs && !ec->rx_max_coalesced_frames)
+		return -EINVAL;
+
+	if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames)
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1))
+		return -EINVAL;
+
+	if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1))
+		return -EINVAL;
+
+	if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1))
+		return -EINVAL;
+
+	if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1))
+		return -EINVAL;
+
+	/* configuration is valid */
+	nn->rx_coalesce_usecs      = ec->rx_coalesce_usecs;
+	nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames;
+	nn->tx_coalesce_usecs      = ec->tx_coalesce_usecs;
+	nn->tx_coalesce_max_frames = ec->tx_max_coalesced_frames;
+
+	/* write configuration to device */
+	nfp_net_coalesce_write_cfg(nn);
+	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+}
+
+static const struct ethtool_ops nfp_net_ethtool_ops = {
+	.get_drvinfo		= nfp_net_get_drvinfo,
+	.get_ringparam		= nfp_net_get_ringparam,
+	.set_ringparam		= nfp_net_set_ringparam,
+	.get_strings		= nfp_net_get_strings,
+	.get_ethtool_stats	= nfp_net_get_stats,
+	.get_sset_count		= nfp_net_get_sset_count,
+	.get_rxnfc		= nfp_net_get_rxnfc,
+	.set_rxnfc		= nfp_net_set_rxnfc,
+	.get_rxfh_indir_size	= nfp_net_get_rxfh_indir_size,
+	.get_rxfh_key_size	= nfp_net_get_rxfh_key_size,
+	.get_rxfh		= nfp_net_get_rxfh,
+	.set_rxfh		= nfp_net_set_rxfh,
+	.get_regs_len		= nfp_net_get_regs_len,
+	.get_regs		= nfp_net_get_regs,
+	.get_coalesce           = nfp_net_get_coalesce,
+	.set_coalesce           = nfp_net_set_coalesce,
+};
+
+void nfp_net_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &nfp_net_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
new file mode 100644
index 000000000000..e2b22b8a20f1
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (C) 2015 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_netvf_main.c
+ * Netronome virtual function network device driver: Main entry point
+ * Author: Jason McMullan <jason.mcmullan@netronome.com>
+ *         Rolf Neugebauer <rolf.neugebauer@netronome.com>
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/etherdevice.h>
+
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+const char nfp_net_driver_name[] = "nfp_netvf";
+const char nfp_net_driver_version[] = "0.1";
+#define PCI_DEVICE_NFP6000VF		0x6003
+static const struct pci_device_id nfp_netvf_pci_device_ids[] = {
+	{ PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_NFP6000VF,
+	  PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID,
+	  PCI_ANY_ID, 0,
+	},
+	{ 0, } /* Required last entry. */
+};
+MODULE_DEVICE_TABLE(pci, nfp_netvf_pci_device_ids);
+
+static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
+{
+	u8 mac_addr[ETH_ALEN];
+
+	put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]);
+	/* We can't do readw for NFP-3200 compatibility */
+	put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16,
+			   &mac_addr[4]);
+
+	if (!is_valid_ether_addr(mac_addr)) {
+		eth_hw_addr_random(nn->netdev);
+		return;
+	}
+
+	ether_addr_copy(nn->netdev->dev_addr, mac_addr);
+	ether_addr_copy(nn->netdev->perm_addr, mac_addr);
+}
+
+static int nfp_netvf_pci_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *pci_id)
+{
+	struct nfp_net_fw_version fw_ver;
+	int max_tx_rings, max_rx_rings;
+	u32 tx_bar_off, rx_bar_off;
+	u32 tx_bar_sz, rx_bar_sz;
+	int tx_bar_no, rx_bar_no;
+	u8 __iomem *ctrl_bar;
+	struct nfp_net *nn;
+	int is_nfp3200;
+	u32 startq;
+	int stride;
+	int err;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = pci_request_regions(pdev, nfp_net_driver_name);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to allocate device memory.\n");
+		goto err_pci_disable;
+	}
+
+	switch (pdev->device) {
+	case PCI_DEVICE_NFP6000VF:
+		is_nfp3200 = 0;
+		break;
+	default:
+		err = -ENODEV;
+		goto err_pci_regions;
+	}
+
+	pci_set_master(pdev);
+
+	err = dma_set_mask_and_coherent(&pdev->dev,
+					DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS));
+	if (err)
+		goto err_pci_regions;
+
+	/* Map the Control BAR.
+	 *
+	 * Irrespective of the advertised BAR size we only map the
+	 * first NFP_NET_CFG_BAR_SZ of the BAR.  This keeps the code
+	 * the identical for PF and VF drivers.
+	 */
+	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR),
+				   NFP_NET_CFG_BAR_SZ);
+	if (!ctrl_bar) {
+		dev_err(&pdev->dev,
+			"Failed to map resource %d\n", NFP_NET_CRTL_BAR);
+		err = -EIO;
+		goto err_pci_regions;
+	}
+
+	nfp_net_get_fw_version(&fw_ver, ctrl_bar);
+	if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
+		dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n",
+			fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor);
+		err = -EINVAL;
+		goto err_ctrl_unmap;
+	}
+
+	/* Determine stride */
+	if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) ||
+	    nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) ||
+	    nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) {
+		stride = 2;
+		tx_bar_no = NFP_NET_Q0_BAR;
+		rx_bar_no = NFP_NET_Q1_BAR;
+		dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
+	} else {
+		switch (fw_ver.major) {
+		case 1 ... 3:
+			if (is_nfp3200) {
+				stride = 2;
+				tx_bar_no = NFP_NET_Q0_BAR;
+				rx_bar_no = NFP_NET_Q1_BAR;
+			} else {
+				stride = 4;
+				tx_bar_no = NFP_NET_Q0_BAR;
+				rx_bar_no = tx_bar_no;
+			}
+			break;
+		default:
+			dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n",
+				fw_ver.resv, fw_ver.class,
+				fw_ver.major, fw_ver.minor);
+			err = -EINVAL;
+			goto err_ctrl_unmap;
+		}
+	}
+
+	/* Find out how many rings are supported */
+	max_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS);
+	max_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS);
+
+	tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * stride;
+	rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * stride;
+
+	/* Sanity checks */
+	if (tx_bar_sz > pci_resource_len(pdev, tx_bar_no)) {
+		dev_err(&pdev->dev,
+			"TX BAR too small for number of TX rings. Adjusting\n");
+		tx_bar_sz = pci_resource_len(pdev, tx_bar_no);
+		max_tx_rings = (tx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2;
+	}
+	if (rx_bar_sz > pci_resource_len(pdev, rx_bar_no)) {
+		dev_err(&pdev->dev,
+			"RX BAR too small for number of RX rings. Adjusting\n");
+		rx_bar_sz = pci_resource_len(pdev, rx_bar_no);
+		max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2;
+	}
+
+	/* XXX Implement a workaround for THB-350 here.  Ideally, we
+	 * have a different PCI ID for A rev VFs.
+	 */
+	switch (pdev->device) {
+	case PCI_DEVICE_NFP6000VF:
+		startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
+		tx_bar_off = NFP_PCIE_QUEUE(startq);
+		startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
+		rx_bar_off = NFP_PCIE_QUEUE(startq);
+		break;
+	default:
+		err = -ENODEV;
+		goto err_ctrl_unmap;
+	}
+
+	/* Allocate and initialise the netdev */
+	nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings);
+	if (IS_ERR(nn)) {
+		err = PTR_ERR(nn);
+		goto err_ctrl_unmap;
+	}
+
+	nn->fw_ver = fw_ver;
+	nn->ctrl_bar = ctrl_bar;
+	nn->is_vf = 1;
+	nn->is_nfp3200 = is_nfp3200;
+	nn->stride_tx = stride;
+	nn->stride_rx = stride;
+
+	if (rx_bar_no == tx_bar_no) {
+		u32 bar_off, bar_sz;
+		resource_size_t map_addr;
+
+		/* Make a single overlapping BAR mapping */
+		if (tx_bar_off < rx_bar_off)
+			bar_off = tx_bar_off;
+		else
+			bar_off = rx_bar_off;
+
+		if ((tx_bar_off + tx_bar_sz) > (rx_bar_off + rx_bar_sz))
+			bar_sz = (tx_bar_off + tx_bar_sz) - bar_off;
+		else
+			bar_sz = (rx_bar_off + rx_bar_sz) - bar_off;
+
+		map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off;
+		nn->q_bar = ioremap_nocache(map_addr, bar_sz);
+		if (!nn->q_bar) {
+			nn_err(nn, "Failed to map resource %d\n", tx_bar_no);
+			err = -EIO;
+			goto err_netdev_free;
+		}
+
+		/* TX queues */
+		nn->tx_bar = nn->q_bar + (tx_bar_off - bar_off);
+		/* RX queues */
+		nn->rx_bar = nn->q_bar + (rx_bar_off - bar_off);
+	} else {
+		resource_size_t map_addr;
+
+		/* TX queues */
+		map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off;
+		nn->tx_bar = ioremap_nocache(map_addr, tx_bar_sz);
+		if (!nn->tx_bar) {
+			nn_err(nn, "Failed to map resource %d\n", tx_bar_no);
+			err = -EIO;
+			goto err_netdev_free;
+		}
+
+		/* RX queues */
+		map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off;
+		nn->rx_bar = ioremap_nocache(map_addr, rx_bar_sz);
+		if (!nn->rx_bar) {
+			nn_err(nn, "Failed to map resource %d\n", rx_bar_no);
+			err = -EIO;
+			goto err_unmap_tx;
+		}
+	}
+
+	nfp_netvf_get_mac_addr(nn);
+
+	err = nfp_net_irqs_alloc(nn);
+	if (!err) {
+		nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n");
+		err = -EIO;
+		goto err_unmap_rx;
+	}
+
+	/* Get ME clock frequency from ctrl BAR
+	 * XXX for now frequency is hardcoded until we figure out how
+	 * to get the value from nfp-hwinfo into ctrl bar
+	 */
+	nn->me_freq_mhz = 1200;
+
+	err = nfp_net_netdev_init(nn->netdev);
+	if (err)
+		goto err_irqs_disable;
+
+	pci_set_drvdata(pdev, nn);
+
+	nfp_net_info(nn);
+	nfp_net_debugfs_adapter_add(nn);
+
+	return 0;
+
+err_irqs_disable:
+	nfp_net_irqs_disable(nn);
+err_unmap_rx:
+	if (!nn->q_bar)
+		iounmap(nn->rx_bar);
+err_unmap_tx:
+	if (!nn->q_bar)
+		iounmap(nn->tx_bar);
+	else
+		iounmap(nn->q_bar);
+err_netdev_free:
+	pci_set_drvdata(pdev, NULL);
+	nfp_net_netdev_free(nn);
+err_ctrl_unmap:
+	iounmap(ctrl_bar);
+err_pci_regions:
+	pci_release_regions(pdev);
+err_pci_disable:
+	pci_disable_device(pdev);
+	return err;
+}
+
+static void nfp_netvf_pci_remove(struct pci_dev *pdev)
+{
+	struct nfp_net *nn = pci_get_drvdata(pdev);
+
+	/* Note, the order is slightly different from above as we need
+	 * to keep the nn pointer around till we have freed everything.
+	 */
+	nfp_net_debugfs_adapter_del(nn);
+
+	nfp_net_netdev_clean(nn->netdev);
+
+	nfp_net_irqs_disable(nn);
+
+	if (!nn->q_bar) {
+		iounmap(nn->rx_bar);
+		iounmap(nn->tx_bar);
+	} else {
+		iounmap(nn->q_bar);
+	}
+	iounmap(nn->ctrl_bar);
+
+	pci_set_drvdata(pdev, NULL);
+
+	nfp_net_netdev_free(nn);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver nfp_netvf_pci_driver = {
+	.name        = nfp_net_driver_name,
+	.id_table    = nfp_netvf_pci_device_ids,
+	.probe       = nfp_netvf_pci_probe,
+	.remove      = nfp_netvf_pci_remove,
+};
+
+static int __init nfp_netvf_init(void)
+{
+	int err;
+
+	pr_info("%s: NFP VF Network driver, Copyright (C) 2014-2015 Netronome Systems\n",
+		nfp_net_driver_name);
+
+	nfp_net_debugfs_create();
+	err = pci_register_driver(&nfp_netvf_pci_driver);
+	if (err) {
+		nfp_net_debugfs_destroy();
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit nfp_netvf_exit(void)
+{
+	pci_unregister_driver(&nfp_netvf_pci_driver);
+	nfp_net_debugfs_destroy();
+}
+
+module_init(nfp_netvf_init);
+module_exit(nfp_netvf_exit);
+
+MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("NFP VF network device driver");
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b159ef8303cc..057665180f13 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1326,7 +1326,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	/* Get platform resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
-	if ((!res) || (irq < 0) || (irq >= NR_IRQS)) {
+	if (!res || irq < 0) {
 		dev_err(&pdev->dev, "error getting resources.\n");
 		ret = -ENXIO;
 		goto err_exit;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index b2f8e854dfd1..264e954675d1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -3993,6 +3993,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_PHY_CORE_WRITE             0x000e0000
 #define DRV_MSG_CODE_SET_VERSION                0x000f0000
 
+#define DRV_MSG_CODE_SET_LED_MODE               0x00200000
+
 #define DRV_MSG_SEQ_NUMBER_MASK                 0x0000ffff
 
 	u32 drv_mb_param;
@@ -4044,6 +4046,10 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT   8
 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK    0x0000FF00
 
+#define DRV_MB_PARAM_SET_LED_MODE_OPER          0x0
+#define DRV_MB_PARAM_SET_LED_MODE_ON            0x1
+#define DRV_MB_PARAM_SET_LED_MODE_OFF           0x2
+
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK                        0xffff0000
 #define FW_MSG_CODE_DRV_LOAD_ENGINE             0x10100000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 947c7af72b25..6b02e1134360 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1135,6 +1135,23 @@ static int qed_drain(struct qed_dev *cdev)
 	return 0;
 }
 
+static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_set_led(hwfn, ptt, mode);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return status;
+}
+
 const struct qed_common_ops qed_common_ops_pass = {
 	.probe = &qed_probe,
 	.remove = &qed_remove,
@@ -1155,6 +1172,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.update_msglvl = &qed_init_dp,
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
+	.set_led = &qed_set_led,
 };
 
 u32 qed_get_protocol_version(enum qed_protocol protocol)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 20d048cdcb88..ba1b1f1ef789 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -858,3 +858,30 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 
 	return 0;
 }
+
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    enum qed_led_mode mode)
+{
+	u32 resp = 0, param = 0, drv_mb_param;
+	int rc;
+
+	switch (mode) {
+	case QED_LED_MODE_ON:
+		drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_ON;
+		break;
+	case QED_LED_MODE_OFF:
+		drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OFF;
+		break;
+	case QED_LED_MODE_RESTORE:
+		drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OPER;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid LED mode %d\n", mode);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LED_MODE,
+			 drv_mb_param, &resp, &param);
+
+	return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index dbaae586b4a7..506197d5c3dd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -224,6 +224,19 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 struct qed_mcp_drv_version *p_ver);
 
+/**
+ * @brief Set LED status
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mode - LED mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
+		    enum qed_led_mode mode);
+
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
  * TODO - this shouldn't really be in .h file, but until all fields
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index ea00d5f3bab4..7c6caf7f6612 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -116,6 +116,7 @@ struct qede_dev {
 				 (edev)->dev_info.num_tc)
 
 	struct qede_fastpath		*fp_array;
+	u16				req_rss;
 	u16				num_rss;
 	u8				num_tc;
 #define QEDE_RSS_CNT(edev)		((edev)->num_rss)
@@ -269,13 +270,13 @@ int qede_change_mtu(struct net_device *dev, int new_mtu);
 void qede_fill_by_demand_stats(struct qede_dev *edev);
 
 #define RX_RING_SIZE_POW	13
-#define RX_RING_SIZE		BIT(RX_RING_SIZE_POW)
+#define RX_RING_SIZE		((u16)BIT(RX_RING_SIZE_POW))
 #define NUM_RX_BDS_MAX		(RX_RING_SIZE - 1)
 #define NUM_RX_BDS_MIN		128
 #define NUM_RX_BDS_DEF		NUM_RX_BDS_MAX
 
 #define TX_RING_SIZE_POW	13
-#define TX_RING_SIZE		BIT(TX_RING_SIZE_POW)
+#define TX_RING_SIZE		((u16)BIT(TX_RING_SIZE_POW))
 #define NUM_TX_BDS_MAX		(TX_RING_SIZE - 1)
 #define NUM_TX_BDS_MIN		128
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 3a362476a22c..e442b85c9a5e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -322,6 +322,30 @@ static void qede_set_msglevel(struct net_device *ndev, u32 level)
 					 dp_module, dp_level);
 }
 
+static int qede_nway_reset(struct net_device *dev)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+	struct qed_link_params link_params;
+
+	if (!netif_running(dev))
+		return 0;
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+	if (!current_link.link_up)
+		return 0;
+
+	/* Toggle the link */
+	memset(&link_params, 0, sizeof(link_params));
+	link_params.link_up = false;
+	edev->ops->common->set_link(edev->cdev, &link_params);
+	link_params.link_up = true;
+	edev->ops->common->set_link(edev->cdev, &link_params);
+
+	return 0;
+}
+
 static u32 qede_get_link(struct net_device *dev)
 {
 	struct qede_dev *edev = netdev_priv(dev);
@@ -333,6 +357,106 @@ static u32 qede_get_link(struct net_device *dev)
 	return current_link.link_up;
 }
 
+static void qede_get_ringparam(struct net_device *dev,
+			       struct ethtool_ringparam *ering)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	ering->rx_max_pending = NUM_RX_BDS_MAX;
+	ering->rx_pending = edev->q_num_rx_buffers;
+	ering->tx_max_pending = NUM_TX_BDS_MAX;
+	ering->tx_pending = edev->q_num_tx_buffers;
+}
+
+static int qede_set_ringparam(struct net_device *dev,
+			      struct ethtool_ringparam *ering)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+		   "Set ring params command parameters: rx_pending = %d, tx_pending = %d\n",
+		   ering->rx_pending, ering->tx_pending);
+
+	/* Validate legality of configuration */
+	if (ering->rx_pending > NUM_RX_BDS_MAX ||
+	    ering->rx_pending < NUM_RX_BDS_MIN ||
+	    ering->tx_pending > NUM_TX_BDS_MAX ||
+	    ering->tx_pending < NUM_TX_BDS_MIN) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "Can only support Rx Buffer size [0%08x,...,0x%08x] and Tx Buffer size [0x%08x,...,0x%08x]\n",
+			   NUM_RX_BDS_MIN, NUM_RX_BDS_MAX,
+			   NUM_TX_BDS_MIN, NUM_TX_BDS_MAX);
+		return -EINVAL;
+	}
+
+	/* Change ring size and re-load */
+	edev->q_num_rx_buffers = ering->rx_pending;
+	edev->q_num_tx_buffers = ering->tx_pending;
+
+	if (netif_running(edev->ndev))
+		qede_reload(edev, NULL, NULL);
+
+	return 0;
+}
+
+static void qede_get_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *epause)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	if (current_link.pause_config & QED_LINK_PAUSE_AUTONEG_ENABLE)
+		epause->autoneg = true;
+	if (current_link.pause_config & QED_LINK_PAUSE_RX_ENABLE)
+		epause->rx_pause = true;
+	if (current_link.pause_config & QED_LINK_PAUSE_TX_ENABLE)
+		epause->tx_pause = true;
+
+	DP_VERBOSE(edev, QED_MSG_DEBUG,
+		   "ethtool_pauseparam: cmd %d  autoneg %d  rx_pause %d  tx_pause %d\n",
+		   epause->cmd, epause->autoneg, epause->rx_pause,
+		   epause->tx_pause);
+}
+
+static int qede_set_pauseparam(struct net_device *dev,
+			       struct ethtool_pauseparam *epause)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_params params;
+	struct qed_link_output current_link;
+
+	if (!edev->dev_info.common.is_mf) {
+		DP_INFO(edev,
+			"Pause parameters can not be updated in non-default mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	memset(&params, 0, sizeof(params));
+	params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG;
+	if (epause->autoneg) {
+		if (!(current_link.supported_caps & SUPPORTED_Autoneg)) {
+			DP_INFO(edev, "autoneg not supported\n");
+			return -EINVAL;
+		}
+		params.pause_config |= QED_LINK_PAUSE_AUTONEG_ENABLE;
+	}
+	if (epause->rx_pause)
+		params.pause_config |= QED_LINK_PAUSE_RX_ENABLE;
+	if (epause->tx_pause)
+		params.pause_config |= QED_LINK_PAUSE_TX_ENABLE;
+
+	params.link_up = true;
+	edev->ops->common->set_link(edev->cdev, &params);
+
+	return 0;
+}
+
 static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args)
 {
 	edev->ndev->mtu = args->mtu;
@@ -366,17 +490,104 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu)
 	return 0;
 }
 
+static void qede_get_channels(struct net_device *dev,
+			      struct ethtool_channels *channels)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	channels->max_combined = QEDE_MAX_RSS_CNT(edev);
+	channels->combined_count = QEDE_RSS_CNT(edev);
+}
+
+static int qede_set_channels(struct net_device *dev,
+			     struct ethtool_channels *channels)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+		   "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n",
+		   channels->rx_count, channels->tx_count,
+		   channels->other_count, channels->combined_count);
+
+	/* We don't support separate rx / tx, nor `other' channels. */
+	if (channels->rx_count || channels->tx_count ||
+	    channels->other_count || (channels->combined_count == 0) ||
+	    (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "command parameters not supported\n");
+		return -EINVAL;
+	}
+
+	/* Check if there was a change in the active parameters */
+	if (channels->combined_count == QEDE_RSS_CNT(edev)) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "No change in active parameters\n");
+		return 0;
+	}
+
+	/* We need the number of queues to be divisible between the hwfns */
+	if (channels->combined_count % edev->dev_info.common.num_hwfns) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "Number of channels must be divisable by %04x\n",
+			   edev->dev_info.common.num_hwfns);
+		return -EINVAL;
+	}
+
+	/* Set number of queues and reload if necessary */
+	edev->req_rss = channels->combined_count;
+	if (netif_running(dev))
+		qede_reload(edev, NULL, NULL);
+
+	return 0;
+}
+
+static int qede_set_phys_id(struct net_device *dev,
+			    enum ethtool_phys_id_state state)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	u8 led_state = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		return 1;	/* cycle on/off once per second */
+
+	case ETHTOOL_ID_ON:
+		led_state = QED_LED_MODE_ON;
+		break;
+
+	case ETHTOOL_ID_OFF:
+		led_state = QED_LED_MODE_OFF;
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		led_state = QED_LED_MODE_RESTORE;
+		break;
+	}
+
+	edev->ops->common->set_led(edev->cdev, led_state);
+
+	return 0;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_settings = qede_get_settings,
 	.set_settings = qede_set_settings,
 	.get_drvinfo = qede_get_drvinfo,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
+	.nway_reset = qede_nway_reset,
 	.get_link = qede_get_link,
+	.get_ringparam = qede_get_ringparam,
+	.set_ringparam = qede_set_ringparam,
+	.get_pauseparam = qede_get_pauseparam,
+	.set_pauseparam = qede_set_pauseparam,
 	.get_strings = qede_get_strings,
+	.set_phys_id = qede_set_phys_id,
 	.get_ethtool_stats = qede_get_ethtool_stats,
 	.get_sset_count = qede_get_sset_count,
 
+	.get_channels = qede_get_channels,
+	.set_channels = qede_set_channels,
 };
 
 void qede_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index f4657a2e730a..6237f10b5119 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1502,8 +1502,11 @@ static int qede_set_num_queues(struct qede_dev *edev)
 	u16 rss_num;
 
 	/* Setup queues according to possible resources*/
-	rss_num = netif_get_num_default_rss_queues() *
-		  edev->dev_info.common.num_hwfns;
+	if (edev->req_rss)
+		rss_num = edev->req_rss;
+	else
+		rss_num = netif_get_num_default_rss_queues() *
+			  edev->dev_info.common.num_hwfns;
 
 	rss_num = min_t(u16, QEDE_MAX_RSS_CNT(edev), rss_num);
 
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index f9dee7436e81..9fbe92ac225b 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -206,6 +206,7 @@ enum CCC_BIT {
 	CCC_OPC_RESET	= 0x00000000,
 	CCC_OPC_CONFIG	= 0x00000001,
 	CCC_OPC_OPERATION = 0x00000002,
+	CCC_GAC		= 0x00000080,
 	CCC_DTSR	= 0x00000100,
 	CCC_CSEL	= 0x00030000,
 	CCC_CSEL_HPB	= 0x00010000,
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 990dc55cdada..1cf12264861c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -115,12 +115,15 @@ static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac)
 	if (mac) {
 		ether_addr_copy(ndev->dev_addr, mac);
 	} else {
-		ndev->dev_addr[0] = (ravb_read(ndev, MAHR) >> 24);
-		ndev->dev_addr[1] = (ravb_read(ndev, MAHR) >> 16) & 0xFF;
-		ndev->dev_addr[2] = (ravb_read(ndev, MAHR) >> 8) & 0xFF;
-		ndev->dev_addr[3] = (ravb_read(ndev, MAHR) >> 0) & 0xFF;
-		ndev->dev_addr[4] = (ravb_read(ndev, MALR) >> 8) & 0xFF;
-		ndev->dev_addr[5] = (ravb_read(ndev, MALR) >> 0) & 0xFF;
+		u32 mahr = ravb_read(ndev, MAHR);
+		u32 malr = ravb_read(ndev, MALR);
+
+		ndev->dev_addr[0] = (mahr >> 24) & 0xFF;
+		ndev->dev_addr[1] = (mahr >> 16) & 0xFF;
+		ndev->dev_addr[2] = (mahr >>  8) & 0xFF;
+		ndev->dev_addr[3] = (mahr >>  0) & 0xFF;
+		ndev->dev_addr[4] = (malr >>  8) & 0xFF;
+		ndev->dev_addr[5] = (malr >>  0) & 0xFF;
 	}
 }
 
@@ -1227,11 +1230,12 @@ static int ravb_open(struct net_device *ndev)
 	/* Device init */
 	error = ravb_dmac_init(ndev);
 	if (error)
-		goto out_free_irq;
+		goto out_free_irq2;
 	ravb_emac_init(ndev);
 
 	/* Initialise PTP Clock driver */
-	ravb_ptp_init(ndev, priv->pdev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_init(ndev, priv->pdev);
 
 	netif_tx_start_all_queues(ndev);
 
@@ -1244,10 +1248,13 @@ static int ravb_open(struct net_device *ndev)
 
 out_ptp_stop:
 	/* Stop PTP Clock driver */
-	ravb_ptp_stop(ndev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_stop(ndev);
+out_free_irq2:
+	if (priv->chip_id == RCAR_GEN3)
+		free_irq(priv->emac_irq, ndev);
 out_free_irq:
 	free_irq(ndev->irq, ndev);
-	free_irq(priv->emac_irq, ndev);
 out_napi_off:
 	napi_disable(&priv->napi[RAVB_NC]);
 	napi_disable(&priv->napi[RAVB_BE]);
@@ -1476,7 +1483,8 @@ static int ravb_close(struct net_device *ndev)
 	ravb_write(ndev, 0, TIC);
 
 	/* Stop PTP Clock driver */
-	ravb_ptp_stop(ndev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_stop(ndev);
 
 	/* Set the config mode to stop the AVB-DMAC's processes */
 	if (ravb_stop_dma(ndev) < 0)
@@ -1656,7 +1664,9 @@ static int ravb_mdio_release(struct ravb_private *priv)
 static const struct of_device_id ravb_match_table[] = {
 	{ .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 },
+	{ .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ravb_match_table);
@@ -1781,8 +1791,16 @@ static int ravb_probe(struct platform_device *pdev)
 	ndev->ethtool_ops = &ravb_ethtool_ops;
 
 	/* Set AVB config mode */
-	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG,
-		   CCC);
+	if (chip_id == RCAR_GEN2) {
+		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
+			   CCC_OPC_CONFIG, CCC);
+		/* Set CSEL value */
+		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) |
+			   CCC_CSEL_HPB, CCC);
+	} else {
+		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
+			   CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
+	}
 
 	/* Set CSEL value */
 	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
@@ -1814,6 +1832,10 @@ static int ravb_probe(struct platform_device *pdev)
 	/* Initialise HW timestamp list */
 	INIT_LIST_HEAD(&priv->ts_skb_list);
 
+	/* Initialise PTP Clock driver */
+	if (chip_id != RCAR_GEN2)
+		ravb_ptp_init(ndev, pdev);
+
 	/* Debug message level */
 	priv->msg_enable = RAVB_DEF_MSG_ENABLE;
 
@@ -1855,6 +1877,10 @@ out_napi_del:
 out_dma_free:
 	dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
 			  priv->desc_bat_dma);
+
+	/* Stop PTP Clock driver */
+	if (chip_id != RCAR_GEN2)
+		ravb_ptp_stop(ndev);
 out_release:
 	if (ndev)
 		free_netdev(ndev);
@@ -1869,6 +1895,10 @@ static int ravb_remove(struct platform_device *pdev)
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct ravb_private *priv = netdev_priv(ndev);
 
+	/* Stop PTP Clock driver */
+	if (priv->chip_id != RCAR_GEN2)
+		ravb_ptp_stop(ndev);
+
 	dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
 			  priv->desc_bat_dma);
 	/* Set reset mode */
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e7bab7909ed9..67cd24312c11 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -449,6 +449,109 @@ static void sh_eth_set_duplex(struct net_device *ndev)
 		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR);
 }
 
+static void sh_eth_chip_reset(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	/* reset device */
+	sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR);
+	mdelay(1);
+}
+
+static void sh_eth_set_rate_gether(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	switch (mdp->speed) {
+	case 10: /* 10BASE */
+		sh_eth_write(ndev, GECMR_10, GECMR);
+		break;
+	case 100:/* 100BASE */
+		sh_eth_write(ndev, GECMR_100, GECMR);
+		break;
+	case 1000: /* 1000BASE */
+		sh_eth_write(ndev, GECMR_1000, GECMR);
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_OF
+/* R7S72100 */
+static struct sh_eth_cpu_data r7s72100_data = {
+	.chip_reset	= sh_eth_chip_reset,
+	.set_duplex	= sh_eth_set_duplex,
+
+	.register_type	= SH_ETH_REG_FAST_RZ,
+
+	.ecsr_value	= ECSR_ICD,
+	.ecsipr_value	= ECSIPR_ICDIP,
+	.eesipr_value	= 0xff7f009f,
+
+	.tx_check	= EESR_TC1 | EESR_FTC,
+	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
+			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
+			  EESR_TDE | EESR_ECI,
+	.fdr_value	= 0x0000070f,
+
+	.no_psr		= 1,
+	.apr		= 1,
+	.mpr		= 1,
+	.tpauser	= 1,
+	.hw_swap	= 1,
+	.rpadir		= 1,
+	.rpadir_value   = 2 << 16,
+	.no_trimd	= 1,
+	.no_ade		= 1,
+	.hw_crc		= 1,
+	.tsu		= 1,
+	.shift_rd0	= 1,
+};
+
+static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	/* reset device */
+	sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR);
+	mdelay(1);
+
+	sh_eth_select_mii(ndev);
+}
+
+/* R8A7740 */
+static struct sh_eth_cpu_data r8a7740_data = {
+	.chip_reset	= sh_eth_chip_reset_r8a7740,
+	.set_duplex	= sh_eth_set_duplex,
+	.set_rate	= sh_eth_set_rate_gether,
+
+	.register_type	= SH_ETH_REG_GIGABIT,
+
+	.ecsr_value	= ECSR_ICD | ECSR_MPD,
+	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
+	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
+
+	.tx_check	= EESR_TC1 | EESR_FTC,
+	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
+			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
+			  EESR_TDE | EESR_ECI,
+	.fdr_value	= 0x0000070f,
+
+	.apr		= 1,
+	.mpr		= 1,
+	.tpauser	= 1,
+	.bculr		= 1,
+	.hw_swap	= 1,
+	.rpadir		= 1,
+	.rpadir_value   = 2 << 16,
+	.no_trimd	= 1,
+	.no_ade		= 1,
+	.tsu		= 1,
+	.select_mii	= 1,
+	.shift_rd0	= 1,
+};
+
 /* There is CPU dependent code */
 static void sh_eth_set_rate_r8a777x(struct net_device *ndev)
 {
@@ -514,6 +617,7 @@ static struct sh_eth_cpu_data r8a779x_data = {
 	.hw_swap	= 1,
 	.rmiimode	= 1,
 };
+#endif /* CONFIG_OF */
 
 static void sh_eth_set_rate_sh7724(struct net_device *ndev)
 {
@@ -671,34 +775,6 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.tsu		= 1,
 };
 
-static void sh_eth_chip_reset(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-
-	/* reset device */
-	sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR);
-	mdelay(1);
-}
-
-static void sh_eth_set_rate_gether(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-
-	switch (mdp->speed) {
-	case 10: /* 10BASE */
-		sh_eth_write(ndev, GECMR_10, GECMR);
-		break;
-	case 100:/* 100BASE */
-		sh_eth_write(ndev, GECMR_100, GECMR);
-		break;
-	case 1000: /* 1000BASE */
-		sh_eth_write(ndev, GECMR_1000, GECMR);
-		break;
-	default:
-		break;
-	}
-}
-
 /* SH7734 */
 static struct sh_eth_cpu_data sh7734_data = {
 	.chip_reset	= sh_eth_chip_reset,
@@ -756,80 +832,6 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.irq_flags	= IRQF_SHARED,
 };
 
-static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-
-	/* reset device */
-	sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR);
-	mdelay(1);
-
-	sh_eth_select_mii(ndev);
-}
-
-/* R8A7740 */
-static struct sh_eth_cpu_data r8a7740_data = {
-	.chip_reset	= sh_eth_chip_reset_r8a7740,
-	.set_duplex	= sh_eth_set_duplex,
-	.set_rate	= sh_eth_set_rate_gether,
-
-	.register_type	= SH_ETH_REG_GIGABIT,
-
-	.ecsr_value	= ECSR_ICD | ECSR_MPD,
-	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
-	.eesipr_value	= DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff,
-
-	.tx_check	= EESR_TC1 | EESR_FTC,
-	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
-			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
-			  EESR_TDE | EESR_ECI,
-	.fdr_value	= 0x0000070f,
-
-	.apr		= 1,
-	.mpr		= 1,
-	.tpauser	= 1,
-	.bculr		= 1,
-	.hw_swap	= 1,
-	.rpadir		= 1,
-	.rpadir_value   = 2 << 16,
-	.no_trimd	= 1,
-	.no_ade		= 1,
-	.tsu		= 1,
-	.select_mii	= 1,
-	.shift_rd0	= 1,
-};
-
-/* R7S72100 */
-static struct sh_eth_cpu_data r7s72100_data = {
-	.chip_reset	= sh_eth_chip_reset,
-	.set_duplex	= sh_eth_set_duplex,
-
-	.register_type	= SH_ETH_REG_FAST_RZ,
-
-	.ecsr_value	= ECSR_ICD,
-	.ecsipr_value	= ECSIPR_ICDIP,
-	.eesipr_value	= 0xff7f009f,
-
-	.tx_check	= EESR_TC1 | EESR_FTC,
-	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
-			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
-			  EESR_TDE | EESR_ECI,
-	.fdr_value	= 0x0000070f,
-
-	.no_psr		= 1,
-	.apr		= 1,
-	.mpr		= 1,
-	.tpauser	= 1,
-	.hw_swap	= 1,
-	.rpadir		= 1,
-	.rpadir_value   = 2 << 16,
-	.no_trimd	= 1,
-	.no_ade		= 1,
-	.hw_crc		= 1,
-	.tsu		= 1,
-	.shift_rd0	= 1,
-};
-
 static struct sh_eth_cpu_data sh7619_data = {
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
@@ -987,12 +989,15 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac)
 	if (mac[0] || mac[1] || mac[2] || mac[3] || mac[4] || mac[5]) {
 		memcpy(ndev->dev_addr, mac, ETH_ALEN);
 	} else {
-		ndev->dev_addr[0] = (sh_eth_read(ndev, MAHR) >> 24);
-		ndev->dev_addr[1] = (sh_eth_read(ndev, MAHR) >> 16) & 0xFF;
-		ndev->dev_addr[2] = (sh_eth_read(ndev, MAHR) >> 8) & 0xFF;
-		ndev->dev_addr[3] = (sh_eth_read(ndev, MAHR) & 0xFF);
-		ndev->dev_addr[4] = (sh_eth_read(ndev, MALR) >> 8) & 0xFF;
-		ndev->dev_addr[5] = (sh_eth_read(ndev, MALR) & 0xFF);
+		u32 mahr = sh_eth_read(ndev, MAHR);
+		u32 malr = sh_eth_read(ndev, MALR);
+
+		ndev->dev_addr[0] = (mahr >> 24) & 0xFF;
+		ndev->dev_addr[1] = (mahr >> 16) & 0xFF;
+		ndev->dev_addr[2] = (mahr >>  8) & 0xFF;
+		ndev->dev_addr[3] = (mahr >>  0) & 0xFF;
+		ndev->dev_addr[4] = (malr >>  8) & 0xFF;
+		ndev->dev_addr[5] = (malr >>  0) & 0xFF;
 	}
 }
 
@@ -1008,56 +1013,34 @@ struct bb_info {
 	void (*set_gate)(void *addr);
 	struct mdiobb_ctrl ctrl;
 	void *addr;
-	u32 mmd_msk;/* MMD */
-	u32 mdo_msk;
-	u32 mdi_msk;
-	u32 mdc_msk;
 };
 
-/* PHY bit set */
-static void bb_set(void *addr, u32 msk)
+static void sh_mdio_ctrl(struct mdiobb_ctrl *ctrl, u32 mask, int set)
 {
-	iowrite32(ioread32(addr) | msk, addr);
-}
+	struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl);
+	u32 pir;
 
-/* PHY bit clear */
-static void bb_clr(void *addr, u32 msk)
-{
-	iowrite32((ioread32(addr) & ~msk), addr);
-}
+	if (bitbang->set_gate)
+		bitbang->set_gate(bitbang->addr);
 
-/* PHY bit read */
-static int bb_read(void *addr, u32 msk)
-{
-	return (ioread32(addr) & msk) != 0;
+	pir = ioread32(bitbang->addr);
+	if (set)
+		pir |=  mask;
+	else
+		pir &= ~mask;
+	iowrite32(pir, bitbang->addr);
 }
 
 /* Data I/O pin control */
 static void sh_mmd_ctrl(struct mdiobb_ctrl *ctrl, int bit)
 {
-	struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl);
-
-	if (bitbang->set_gate)
-		bitbang->set_gate(bitbang->addr);
-
-	if (bit)
-		bb_set(bitbang->addr, bitbang->mmd_msk);
-	else
-		bb_clr(bitbang->addr, bitbang->mmd_msk);
+	sh_mdio_ctrl(ctrl, PIR_MMD, bit);
 }
 
 /* Set bit data*/
 static void sh_set_mdio(struct mdiobb_ctrl *ctrl, int bit)
 {
-	struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl);
-
-	if (bitbang->set_gate)
-		bitbang->set_gate(bitbang->addr);
-
-	if (bit)
-		bb_set(bitbang->addr, bitbang->mdo_msk);
-	else
-		bb_clr(bitbang->addr, bitbang->mdo_msk);
+	sh_mdio_ctrl(ctrl, PIR_MDO, bit);
 }
 
 /* Get bit data*/
@@ -1068,21 +1051,13 @@ static int sh_get_mdio(struct mdiobb_ctrl *ctrl)
 	if (bitbang->set_gate)
 		bitbang->set_gate(bitbang->addr);
 
-	return bb_read(bitbang->addr, bitbang->mdi_msk);
+	return (ioread32(bitbang->addr) & PIR_MDI) != 0;
 }
 
 /* MDC pin control */
 static void sh_mdc_ctrl(struct mdiobb_ctrl *ctrl, int bit)
 {
-	struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl);
-
-	if (bitbang->set_gate)
-		bitbang->set_gate(bitbang->addr);
-
-	if (bit)
-		bb_set(bitbang->addr, bitbang->mdc_msk);
-	else
-		bb_clr(bitbang->addr, bitbang->mdc_msk);
+	sh_mdio_ctrl(ctrl, PIR_MDC, bit);
 }
 
 /* mdio bus control struct */
@@ -2894,10 +2869,6 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
 	/* bitbang init */
 	bitbang->addr = mdp->addr + mdp->reg_offset[PIR];
 	bitbang->set_gate = pd->set_mdio_gate;
-	bitbang->mdi_msk = PIR_MDI;
-	bitbang->mdo_msk = PIR_MDO;
-	bitbang->mmd_msk = PIR_MMD;
-	bitbang->mdc_msk = PIR_MDC;
 	bitbang->ctrl.ops = &bb_ops;
 
 	/* MII controller setting */
@@ -3277,13 +3248,6 @@ static struct platform_device_id sh_eth_id_table[] = {
 	{ "sh7757-ether", (kernel_ulong_t)&sh7757_data },
 	{ "sh7757-gether", (kernel_ulong_t)&sh7757_data_giga },
 	{ "sh7763-gether", (kernel_ulong_t)&sh7763_data },
-	{ "r7s72100-ether", (kernel_ulong_t)&r7s72100_data },
-	{ "r8a7740-gether", (kernel_ulong_t)&r8a7740_data },
-	{ "r8a777x-ether", (kernel_ulong_t)&r8a777x_data },
-	{ "r8a7790-ether", (kernel_ulong_t)&r8a779x_data },
-	{ "r8a7791-ether", (kernel_ulong_t)&r8a779x_data },
-	{ "r8a7793-ether", (kernel_ulong_t)&r8a779x_data },
-	{ "r8a7794-ether", (kernel_ulong_t)&r8a779x_data },
 	{ }
 };
 MODULE_DEVICE_TABLE(platform, sh_eth_id_table);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index bc6d21b471be..c4a0e8a967dd 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -181,13 +181,6 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 		MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID);
 
 	if (!(nic_data->datapath_caps &
-	      (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) {
-		netif_err(efx, drv, efx->net_dev,
-			  "current firmware does not support TSO\n");
-		return -ENODEV;
-	}
-
-	if (!(nic_data->datapath_caps &
 	      (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) {
 		netif_err(efx, probe, efx->net_dev,
 			  "current firmware does not support an RX prefix\n");
@@ -1797,6 +1790,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 			     ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
 			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
 	tx_queue->write_count = 1;
+
+	if (nic_data->datapath_caps &
+	    (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
+		tx_queue->tso_version = 1;
+	}
+
 	wmb();
 	efx_ef10_push_tx_desc(tx_queue, txd);
 
@@ -2375,8 +2374,19 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
 				    1 << MC_CMD_WORKAROUND_EXT_OUT_FLR_DONE_LBN) {
 					netif_info(efx, drv, efx->net_dev,
 						   "other functions on NIC have been reset\n");
-					/* MC's boot count has incremented */
-					++nic_data->warm_boot_count;
+
+					/* With MCFW v4.6.x and earlier, the
+					 * boot count will have incremented,
+					 * so re-read the warm_boot_count
+					 * value now to ensure this function
+					 * doesn't think it has changed next
+					 * time it checks.
+					 */
+					rc = efx_ef10_get_warm_boot_count(efx);
+					if (rc >= 0) {
+						nic_data->warm_boot_count = rc;
+						rc = 0;
+					}
 				}
 				nic_data->workaround_26807 = true;
 			} else if (rc == -EPERM) {
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 4e82bcfbe3e0..b405349a570c 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2784,6 +2784,12 @@ static const struct pci_device_id efx_pci_table[] = {
 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
+	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
+	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
+	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
 	{0}			/* end of list */
 };
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index a8ddd122f685..38c422321cda 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -182,6 +182,7 @@ struct efx_tx_buffer {
  *
  * @efx: The associated Efx NIC
  * @queue: DMA queue number
+ * @tso_version: Version of TSO in use for this queue.
  * @channel: The associated channel
  * @core_txq: The networking core TX queue structure
  * @buffer: The software buffer ring
@@ -228,6 +229,7 @@ struct efx_tx_queue {
 	/* Members which don't change on the fast path */
 	struct efx_nic *efx ____cacheline_aligned_in_smp;
 	unsigned queue;
+	unsigned int tso_version;
 	struct efx_channel *channel;
 	struct netdev_queue *core_txq;
 	struct efx_tx_buffer *buffer;
@@ -1502,8 +1504,9 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
  * same cycle, the XMAC can miss the IPG altogether.  We work around
  * this by adding a further 16 bytes.
  */
+#define EFX_FRAME_PAD	16
 #define EFX_MAX_FRAME_LEN(mtu) \
-	((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16)
+	(ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8))
 
 static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb)
 {
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 67f6afaa022f..f7a0ec1bca97 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -1010,13 +1010,17 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
 
 /* Parse the SKB header and initialise state. */
 static int tso_start(struct tso_state *st, struct efx_nic *efx,
+		     struct efx_tx_queue *tx_queue,
 		     const struct sk_buff *skb)
 {
-	bool use_opt_desc = efx_nic_rev(efx) >= EFX_REV_HUNT_A0;
 	struct device *dma_dev = &efx->pci_dev->dev;
 	unsigned int header_len, in_len;
+	bool use_opt_desc = false;
 	dma_addr_t dma_addr;
 
+	if (tx_queue->tso_version == 1)
+		use_opt_desc = true;
+
 	st->ip_off = skb_network_header(skb) - skb->data;
 	st->tcp_off = skb_transport_header(skb) - skb->data;
 	header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
@@ -1271,7 +1275,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 	/* Find the packet protocol and sanity-check it */
 	state.protocol = efx_tso_check_protocol(skb);
 
-	rc = tso_start(&state, efx, skb);
+	rc = tso_start(&state, efx, tx_queue, skb);
 	if (rc)
 		goto mem_err;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 623c6ed8764a..f4518bc2cd28 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -137,6 +137,31 @@ struct stmmac_extra_stats {
 	unsigned long pcs_link;
 	unsigned long pcs_duplex;
 	unsigned long pcs_speed;
+	/* debug register */
+	unsigned long mtl_tx_status_fifo_full;
+	unsigned long mtl_tx_fifo_not_empty;
+	unsigned long mmtl_fifo_ctrl;
+	unsigned long mtl_tx_fifo_read_ctrl_write;
+	unsigned long mtl_tx_fifo_read_ctrl_wait;
+	unsigned long mtl_tx_fifo_read_ctrl_read;
+	unsigned long mtl_tx_fifo_read_ctrl_idle;
+	unsigned long mac_tx_in_pause;
+	unsigned long mac_tx_frame_ctrl_xfer;
+	unsigned long mac_tx_frame_ctrl_idle;
+	unsigned long mac_tx_frame_ctrl_wait;
+	unsigned long mac_tx_frame_ctrl_pause;
+	unsigned long mac_gmii_tx_proto_engine;
+	unsigned long mtl_rx_fifo_fill_level_full;
+	unsigned long mtl_rx_fifo_fill_above_thresh;
+	unsigned long mtl_rx_fifo_fill_below_thresh;
+	unsigned long mtl_rx_fifo_fill_level_empty;
+	unsigned long mtl_rx_fifo_read_ctrl_flush;
+	unsigned long mtl_rx_fifo_read_ctrl_read_data;
+	unsigned long mtl_rx_fifo_read_ctrl_status;
+	unsigned long mtl_rx_fifo_read_ctrl_idle;
+	unsigned long mtl_rx_fifo_ctrl_active;
+	unsigned long mac_rx_frame_ctrl_fifo;
+	unsigned long mac_gmii_rx_proto_engine;
 };
 
 /* CSR Frequency Access Defines*/
@@ -408,6 +433,7 @@ struct stmmac_ops {
 	void (*set_eee_pls)(struct mac_device_info *hw, int link);
 	void (*ctrl_ane)(struct mac_device_info *hw, bool restart);
 	void (*get_adv)(struct mac_device_info *hw, struct rgmii_adv *adv);
+	void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x);
 };
 
 /* PTP and HW Timer helpers */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 82de68b1a452..36d3355f2fb0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -198,19 +198,19 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed)
 	return 0;
 }
 
-static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
+static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 {
 	struct device *dev = &gmac->pdev->dev;
 
 	gmac->phy_mode = of_get_phy_mode(dev->of_node);
 	if (gmac->phy_mode < 0) {
 		dev_err(dev, "missing phy mode property\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	if (of_property_read_u32(dev->of_node, "qcom,id", &gmac->id) < 0) {
 		dev_err(dev, "missing qcom id property\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	/* The GMACs are called 1 to 4 in the documentation, but to simplify the
@@ -219,13 +219,13 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 	 */
 	if (gmac->id < 0 || gmac->id > 3) {
 		dev_err(dev, "invalid gmac id\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	gmac->core_clk = devm_clk_get(dev, "stmmaceth");
 	if (IS_ERR(gmac->core_clk)) {
 		dev_err(dev, "missing stmmaceth clk property\n");
-		return gmac->core_clk;
+		return PTR_ERR(gmac->core_clk);
 	}
 	clk_set_rate(gmac->core_clk, 266000000);
 
@@ -234,18 +234,16 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 							   "qcom,nss-common");
 	if (IS_ERR(gmac->nss_common)) {
 		dev_err(dev, "missing nss-common node\n");
-		return gmac->nss_common;
+		return PTR_ERR(gmac->nss_common);
 	}
 
 	/* Setup the register map for the qsgmii csr registers */
 	gmac->qsgmii_csr = syscon_regmap_lookup_by_phandle(dev->of_node,
 							   "qcom,qsgmii-csr");
-	if (IS_ERR(gmac->qsgmii_csr)) {
+	if (IS_ERR(gmac->qsgmii_csr))
 		dev_err(dev, "missing qsgmii-csr node\n");
-		return gmac->qsgmii_csr;
-	}
 
-	return NULL;
+	return PTR_ERR_OR_ZERO(gmac->qsgmii_csr);
 }
 
 static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed)
@@ -262,7 +260,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct ipq806x_gmac *gmac;
 	int val;
-	void *err;
+	int err;
 
 	val = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (val)
@@ -279,9 +277,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	gmac->pdev = pdev;
 
 	err = ipq806x_gmac_of_parse(gmac);
-	if (IS_ERR(err)) {
+	if (err) {
 		dev_err(dev, "device tree parsing error\n");
-		return PTR_ERR(err);
+		return err;
 	}
 
 	regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 7f6f4a4fcc70..58c05acc2aab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -299,16 +299,17 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 	if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
 		const char *rs;
 
+		dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+
 		err = of_property_read_string(np, "st,tx-retime-src", &rs);
 		if (err < 0) {
 			dev_warn(dev, "Use internal clock source\n");
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
-		} else if (!strcasecmp(rs, "clk_125")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-		} else if (!strcasecmp(rs, "txclk")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+		} else {
+			if (!strcasecmp(rs, "clk_125"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
+			else if (!strcasecmp(rs, "txclk"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
 		}
-
 		dwmac->speed = SPEED_1000;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index b3fe0575ff6b..8831a053ac13 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -34,6 +34,7 @@
 #define GMAC_FLOW_CTRL		0x00000018	/* Flow Control */
 #define GMAC_VLAN_TAG		0x0000001c	/* VLAN Tag */
 #define GMAC_VERSION		0x00000020	/* GMAC CORE Version */
+#define GMAC_DEBUG		0x00000024	/* GMAC debug register */
 #define GMAC_WAKEUP_FILTER	0x00000028	/* Wake-up Frame Filter */
 
 #define GMAC_INT_STATUS		0x00000038	/* interrupt status register */
@@ -177,6 +178,47 @@ enum inter_frame_gap {
 #define GMAC_FLOW_CTRL_TFE	0x00000002	/* Tx Flow Control Enable */
 #define GMAC_FLOW_CTRL_FCB_BPA	0x00000001	/* Flow Control Busy ... */
 
+/* DEBUG Register defines */
+/* MTL TxStatus FIFO */
+#define GMAC_DEBUG_TXSTSFSTS	BIT(25)	/* MTL TxStatus FIFO Full Status */
+#define GMAC_DEBUG_TXFSTS	BIT(24) /* MTL Tx FIFO Not Empty Status */
+#define GMAC_DEBUG_TWCSTS	BIT(22) /* MTL Tx FIFO Write Controller */
+/* MTL Tx FIFO Read Controller Status */
+#define GMAC_DEBUG_TRCSTS_MASK	GENMASK(21, 20)
+#define GMAC_DEBUG_TRCSTS_SHIFT	20
+#define GMAC_DEBUG_TRCSTS_IDLE	0
+#define GMAC_DEBUG_TRCSTS_READ	1
+#define GMAC_DEBUG_TRCSTS_TXW	2
+#define GMAC_DEBUG_TRCSTS_WRITE	3
+#define GMAC_DEBUG_TXPAUSED	BIT(19) /* MAC Transmitter in PAUSE */
+/* MAC Transmit Frame Controller Status */
+#define GMAC_DEBUG_TFCSTS_MASK	GENMASK(18, 17)
+#define GMAC_DEBUG_TFCSTS_SHIFT	17
+#define GMAC_DEBUG_TFCSTS_IDLE	0
+#define GMAC_DEBUG_TFCSTS_WAIT	1
+#define GMAC_DEBUG_TFCSTS_GEN_PAUSE	2
+#define GMAC_DEBUG_TFCSTS_XFER	3
+/* MAC GMII or MII Transmit Protocol Engine Status */
+#define GMAC_DEBUG_TPESTS	BIT(16)
+#define GMAC_DEBUG_RXFSTS_MASK	GENMASK(9, 8) /* MTL Rx FIFO Fill-level */
+#define GMAC_DEBUG_RXFSTS_SHIFT	8
+#define GMAC_DEBUG_RXFSTS_EMPTY	0
+#define GMAC_DEBUG_RXFSTS_BT	1
+#define GMAC_DEBUG_RXFSTS_AT	2
+#define GMAC_DEBUG_RXFSTS_FULL	3
+#define GMAC_DEBUG_RRCSTS_MASK	GENMASK(6, 5) /* MTL Rx FIFO Read Controller */
+#define GMAC_DEBUG_RRCSTS_SHIFT	5
+#define GMAC_DEBUG_RRCSTS_IDLE	0
+#define GMAC_DEBUG_RRCSTS_RDATA	1
+#define GMAC_DEBUG_RRCSTS_RSTAT	2
+#define GMAC_DEBUG_RRCSTS_FLUSH	3
+#define GMAC_DEBUG_RWCSTS	BIT(4) /* MTL Rx FIFO Write Controller Active */
+/* MAC Receive Frame Controller FIFO Status */
+#define GMAC_DEBUG_RFCFCSTS_MASK	GENMASK(2, 1)
+#define GMAC_DEBUG_RFCFCSTS_SHIFT	1
+/* MAC GMII or MII Receive Protocol Engine Status */
+#define GMAC_DEBUG_RPESTS	BIT(0)
+
 /*--- DMA BLOCK defines ---*/
 /* DMA Bus Mode register defines */
 #define DMA_BUS_MODE_SFT_RESET	0x00000001	/* Software Reset */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 371a669d69fd..c2941172f6d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -397,6 +397,80 @@ static void dwmac1000_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv)
 	adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT;
 }
 
+static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+{
+	u32 value = readl(ioaddr + GMAC_DEBUG);
+
+	if (value & GMAC_DEBUG_TXSTSFSTS)
+		x->mtl_tx_status_fifo_full++;
+	if (value & GMAC_DEBUG_TXFSTS)
+		x->mtl_tx_fifo_not_empty++;
+	if (value & GMAC_DEBUG_TWCSTS)
+		x->mmtl_fifo_ctrl++;
+	if (value & GMAC_DEBUG_TRCSTS_MASK) {
+		u32 trcsts = (value & GMAC_DEBUG_TRCSTS_MASK)
+			     >> GMAC_DEBUG_TRCSTS_SHIFT;
+		if (trcsts == GMAC_DEBUG_TRCSTS_WRITE)
+			x->mtl_tx_fifo_read_ctrl_write++;
+		else if (trcsts == GMAC_DEBUG_TRCSTS_TXW)
+			x->mtl_tx_fifo_read_ctrl_wait++;
+		else if (trcsts == GMAC_DEBUG_TRCSTS_READ)
+			x->mtl_tx_fifo_read_ctrl_read++;
+		else
+			x->mtl_tx_fifo_read_ctrl_idle++;
+	}
+	if (value & GMAC_DEBUG_TXPAUSED)
+		x->mac_tx_in_pause++;
+	if (value & GMAC_DEBUG_TFCSTS_MASK) {
+		u32 tfcsts = (value & GMAC_DEBUG_TFCSTS_MASK)
+			      >> GMAC_DEBUG_TFCSTS_SHIFT;
+
+		if (tfcsts == GMAC_DEBUG_TFCSTS_XFER)
+			x->mac_tx_frame_ctrl_xfer++;
+		else if (tfcsts == GMAC_DEBUG_TFCSTS_GEN_PAUSE)
+			x->mac_tx_frame_ctrl_pause++;
+		else if (tfcsts == GMAC_DEBUG_TFCSTS_WAIT)
+			x->mac_tx_frame_ctrl_wait++;
+		else
+			x->mac_tx_frame_ctrl_idle++;
+	}
+	if (value & GMAC_DEBUG_TPESTS)
+		x->mac_gmii_tx_proto_engine++;
+	if (value & GMAC_DEBUG_RXFSTS_MASK) {
+		u32 rxfsts = (value & GMAC_DEBUG_RXFSTS_MASK)
+			     >> GMAC_DEBUG_RRCSTS_SHIFT;
+
+		if (rxfsts == GMAC_DEBUG_RXFSTS_FULL)
+			x->mtl_rx_fifo_fill_level_full++;
+		else if (rxfsts == GMAC_DEBUG_RXFSTS_AT)
+			x->mtl_rx_fifo_fill_above_thresh++;
+		else if (rxfsts == GMAC_DEBUG_RXFSTS_BT)
+			x->mtl_rx_fifo_fill_below_thresh++;
+		else
+			x->mtl_rx_fifo_fill_level_empty++;
+	}
+	if (value & GMAC_DEBUG_RRCSTS_MASK) {
+		u32 rrcsts = (value & GMAC_DEBUG_RRCSTS_MASK) >>
+			     GMAC_DEBUG_RRCSTS_SHIFT;
+
+		if (rrcsts == GMAC_DEBUG_RRCSTS_FLUSH)
+			x->mtl_rx_fifo_read_ctrl_flush++;
+		else if (rrcsts == GMAC_DEBUG_RRCSTS_RSTAT)
+			x->mtl_rx_fifo_read_ctrl_read_data++;
+		else if (rrcsts == GMAC_DEBUG_RRCSTS_RDATA)
+			x->mtl_rx_fifo_read_ctrl_status++;
+		else
+			x->mtl_rx_fifo_read_ctrl_idle++;
+	}
+	if (value & GMAC_DEBUG_RWCSTS)
+		x->mtl_rx_fifo_ctrl_active++;
+	if (value & GMAC_DEBUG_RFCFCSTS_MASK)
+		x->mac_rx_frame_ctrl_fifo = (value & GMAC_DEBUG_RFCFCSTS_MASK)
+					    >> GMAC_DEBUG_RFCFCSTS_SHIFT;
+	if (value & GMAC_DEBUG_RPESTS)
+		x->mac_gmii_rx_proto_engine++;
+}
+
 static const struct stmmac_ops dwmac1000_ops = {
 	.core_init = dwmac1000_core_init,
 	.rx_ipc = dwmac1000_rx_ipc_enable,
@@ -413,6 +487,7 @@ static const struct stmmac_ops dwmac1000_ops = {
 	.set_eee_pls = dwmac1000_set_eee_pls,
 	.ctrl_ane = dwmac1000_ctrl_ane,
 	.get_adv = dwmac1000_get_adv,
+	.debug = dwmac1000_debug,
 };
 
 struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2e51b816a7e8..4c6486cc80fb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -136,6 +136,31 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(irq_pcs_ane_n),
 	STMMAC_STAT(irq_pcs_link_n),
 	STMMAC_STAT(irq_rgmii_n),
+	/* DEBUG */
+	STMMAC_STAT(mtl_tx_status_fifo_full),
+	STMMAC_STAT(mtl_tx_fifo_not_empty),
+	STMMAC_STAT(mmtl_fifo_ctrl),
+	STMMAC_STAT(mtl_tx_fifo_read_ctrl_write),
+	STMMAC_STAT(mtl_tx_fifo_read_ctrl_wait),
+	STMMAC_STAT(mtl_tx_fifo_read_ctrl_read),
+	STMMAC_STAT(mtl_tx_fifo_read_ctrl_idle),
+	STMMAC_STAT(mac_tx_in_pause),
+	STMMAC_STAT(mac_tx_frame_ctrl_xfer),
+	STMMAC_STAT(mac_tx_frame_ctrl_idle),
+	STMMAC_STAT(mac_tx_frame_ctrl_wait),
+	STMMAC_STAT(mac_tx_frame_ctrl_pause),
+	STMMAC_STAT(mac_gmii_tx_proto_engine),
+	STMMAC_STAT(mtl_rx_fifo_fill_level_full),
+	STMMAC_STAT(mtl_rx_fifo_fill_above_thresh),
+	STMMAC_STAT(mtl_rx_fifo_fill_below_thresh),
+	STMMAC_STAT(mtl_rx_fifo_fill_level_empty),
+	STMMAC_STAT(mtl_rx_fifo_read_ctrl_flush),
+	STMMAC_STAT(mtl_rx_fifo_read_ctrl_read_data),
+	STMMAC_STAT(mtl_rx_fifo_read_ctrl_status),
+	STMMAC_STAT(mtl_rx_fifo_read_ctrl_idle),
+	STMMAC_STAT(mtl_rx_fifo_ctrl_active),
+	STMMAC_STAT(mac_rx_frame_ctrl_fifo),
+	STMMAC_STAT(mac_gmii_rx_proto_engine),
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
@@ -497,6 +522,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 			if (val)
 				priv->xstats.phy_eee_wakeup_error_n = val;
 		}
+
+		if ((priv->hw->mac->debug) &&
+		    (priv->synopsys_id >= DWMAC_CORE_3_50))
+			priv->hw->mac->debug(priv->ioaddr,
+					     (void *)&priv->xstats);
 	}
 	for (i = 0; i < STMMAC_STATS_LEN; i++) {
 		char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 64d8aa4e0cad..3c6549aee11d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -185,7 +185,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
 			priv->clk_csr = STMMAC_CSR_100_150M;
 		else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M))
 			priv->clk_csr = STMMAC_CSR_150_250M;
-		else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
+		else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
 			priv->clk_csr = STMMAC_CSR_250_300M;
 	}
 }
@@ -2232,6 +2232,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
 			frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
 
+			/*  check if frame_len fits the preallocated memory */
+			if (frame_len > priv->dma_buf_sz) {
+				priv->dev->stats.rx_length_errors++;
+				break;
+			}
+
 			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
 			 * Type frames (LLC/LLC-SNAP)
 			 */
@@ -3102,6 +3108,7 @@ int stmmac_resume(struct net_device *ndev)
 	init_dma_desc_rings(ndev, GFP_ATOMIC);
 	stmmac_hw_setup(ndev, false);
 	stmmac_init_tx_coalesce(priv);
+	stmmac_set_rx_mode(ndev);
 
 	napi_enable(&priv->napi);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ebf6abc4853f..bba670c42e37 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -138,7 +138,6 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node) {
-		int reset_gpio, active_low;
 
 		if (data->reset_gpio < 0) {
 			struct device_node *np = priv->device->of_node;
@@ -154,24 +153,23 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 						"snps,reset-active-low");
 			of_property_read_u32_array(np,
 				"snps,reset-delays-us", data->delays, 3);
-		}
 
-		reset_gpio = data->reset_gpio;
-		active_low = data->active_low;
+			if (gpio_request(data->reset_gpio, "mdio-reset"))
+				return 0;
+		}
 
-		if (!gpio_request(reset_gpio, "mdio-reset")) {
-			gpio_direction_output(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[0])
-				msleep(DIV_ROUND_UP(data->delays[0], 1000));
+		gpio_direction_output(data->reset_gpio,
+				      data->active_low ? 1 : 0);
+		if (data->delays[0])
+			msleep(DIV_ROUND_UP(data->delays[0], 1000));
 
-			gpio_set_value(reset_gpio, active_low ? 0 : 1);
-			if (data->delays[1])
-				msleep(DIV_ROUND_UP(data->delays[1], 1000));
+		gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
+		if (data->delays[1])
+			msleep(DIV_ROUND_UP(data->delays[1], 1000));
 
-			gpio_set_value(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[2])
-				msleep(DIV_ROUND_UP(data->delays[2], 1000));
-		}
+		gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
+		if (data->delays[2])
+			msleep(DIV_ROUND_UP(data->delays[2], 1000));
 	}
 #endif
 
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index c08be62bceba..1562ab4151e1 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -78,6 +78,9 @@ static int cpsw_am33xx_cm_get_macid(struct device *dev, u16 offset, int slave,
 
 int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 {
+	if (of_machine_is_compatible("ti,dm8148"))
+		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
+
 	if (of_machine_is_compatible("ti,am33xx"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index bb1bb72121c0..17a26a429b71 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -113,7 +113,7 @@ struct netcp_intf {
 #define	NETCP_PSDATA_LEN		KNAV_DMA_NUM_PS_WORDS
 struct netcp_packet {
 	struct sk_buff		*skb;
-	u32			*epib;
+	__le32			*epib;
 	u32			*psdata;
 	unsigned int		psdata_len;
 	struct netcp_intf	*netcp;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index e5e20e734f21..92d08eb262c2 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -109,69 +109,80 @@ module_param(netcp_debug_level, int, 0);
 MODULE_PARM_DESC(netcp_debug_level, "Netcp debug level (NETIF_MSG bits) (0=none,...,16=all)");
 
 /* Helper functions - Get/Set */
-static void get_pkt_info(u32 *buff, u32 *buff_len, u32 *ndesc,
+static void get_pkt_info(dma_addr_t *buff, u32 *buff_len, dma_addr_t *ndesc,
 			 struct knav_dma_desc *desc)
 {
-	*buff_len = desc->buff_len;
-	*buff = desc->buff;
-	*ndesc = desc->next_desc;
+	*buff_len = le32_to_cpu(desc->buff_len);
+	*buff = le32_to_cpu(desc->buff);
+	*ndesc = le32_to_cpu(desc->next_desc);
 }
 
-static void get_pad_info(u32 *pad0, u32 *pad1, struct knav_dma_desc *desc)
+static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
 {
-	*pad0 = desc->pad[0];
-	*pad1 = desc->pad[1];
+	*pad0 = le32_to_cpu(desc->pad[0]);
+	*pad1 = le32_to_cpu(desc->pad[1]);
+	*pad2 = le32_to_cpu(desc->pad[2]);
 }
 
-static void get_org_pkt_info(u32 *buff, u32 *buff_len,
+static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
+{
+	u64 pad64;
+
+	pad64 = le32_to_cpu(desc->pad[0]) +
+		((u64)le32_to_cpu(desc->pad[1]) << 32);
+	*padptr = (void *)(uintptr_t)pad64;
+}
+
+static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
 			     struct knav_dma_desc *desc)
 {
-	*buff = desc->orig_buff;
-	*buff_len = desc->orig_len;
+	*buff = le32_to_cpu(desc->orig_buff);
+	*buff_len = le32_to_cpu(desc->orig_len);
 }
 
-static void get_words(u32 *words, int num_words, u32 *desc)
+static void get_words(dma_addr_t *words, int num_words, __le32 *desc)
 {
 	int i;
 
 	for (i = 0; i < num_words; i++)
-		words[i] = desc[i];
+		words[i] = le32_to_cpu(desc[i]);
 }
 
-static void set_pkt_info(u32 buff, u32 buff_len, u32 ndesc,
+static void set_pkt_info(dma_addr_t buff, u32 buff_len, u32 ndesc,
 			 struct knav_dma_desc *desc)
 {
-	desc->buff_len = buff_len;
-	desc->buff = buff;
-	desc->next_desc = ndesc;
+	desc->buff_len = cpu_to_le32(buff_len);
+	desc->buff = cpu_to_le32(buff);
+	desc->next_desc = cpu_to_le32(ndesc);
 }
 
 static void set_desc_info(u32 desc_info, u32 pkt_info,
 			  struct knav_dma_desc *desc)
 {
-	desc->desc_info = desc_info;
-	desc->packet_info = pkt_info;
+	desc->desc_info = cpu_to_le32(desc_info);
+	desc->packet_info = cpu_to_le32(pkt_info);
 }
 
-static void set_pad_info(u32 pad0, u32 pad1, struct knav_dma_desc *desc)
+static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
 {
-	desc->pad[0] = pad0;
-	desc->pad[1] = pad1;
+	desc->pad[0] = cpu_to_le32(pad0);
+	desc->pad[1] = cpu_to_le32(pad1);
+	desc->pad[2] = cpu_to_le32(pad1);
 }
 
-static void set_org_pkt_info(u32 buff, u32 buff_len,
+static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
 			     struct knav_dma_desc *desc)
 {
-	desc->orig_buff = buff;
-	desc->orig_len = buff_len;
+	desc->orig_buff = cpu_to_le32(buff);
+	desc->orig_len = cpu_to_le32(buff_len);
 }
 
-static void set_words(u32 *words, int num_words, u32 *desc)
+static void set_words(u32 *words, int num_words, __le32 *desc)
 {
 	int i;
 
 	for (i = 0; i < num_words; i++)
-		desc[i] = words[i];
+		desc[i] = cpu_to_le32(words[i]);
 }
 
 /* Read the e-fuse value as 32 bit values to be endian independent */
@@ -570,7 +581,7 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
 	dma_addr_t dma_desc, dma_buf;
 	unsigned int buf_len, dma_sz = sizeof(*ndesc);
 	void *buf_ptr;
-	u32 tmp;
+	u32 pad[2];
 
 	get_words(&dma_desc, 1, &desc->next_desc);
 
@@ -580,14 +591,15 @@ static void netcp_free_rx_desc_chain(struct netcp_intf *netcp,
 			dev_err(netcp->ndev_dev, "failed to unmap Rx desc\n");
 			break;
 		}
-		get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
-		get_pad_info((u32 *)&buf_ptr, &tmp, ndesc);
+		get_pad_ptr(&buf_ptr, ndesc);
 		dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
 		__free_page(buf_ptr);
 		knav_pool_desc_put(netcp->rx_pool, desc);
 	}
 
-	get_pad_info((u32 *)&buf_ptr, &buf_len, desc);
+	get_pad_info(&pad[0], &pad[1], &buf_len, desc);
+	buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+
 	if (buf_ptr)
 		netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
 	knav_pool_desc_put(netcp->rx_pool, desc);
@@ -626,7 +638,6 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 	struct netcp_packet p_info;
 	struct sk_buff *skb;
 	void *org_buf_ptr;
-	u32 tmp;
 
 	dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
 	if (!dma_desc)
@@ -639,7 +650,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 	}
 
 	get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
-	get_pad_info((u32 *)&org_buf_ptr, &org_buf_len, desc);
+	get_pad_ptr(&org_buf_ptr, desc);
 
 	if (unlikely(!org_buf_ptr)) {
 		dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@ -664,6 +675,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 	/* Fill in the page fragment list */
 	while (dma_desc) {
 		struct page *page;
+		void *ptr;
 
 		ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
 		if (unlikely(!ndesc)) {
@@ -672,14 +684,15 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 		}
 
 		get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
-		get_pad_info((u32 *)&page, &tmp, ndesc);
+		get_pad_ptr(ptr, ndesc);
+		page = ptr;
 
 		if (likely(dma_buff && buf_len && page)) {
 			dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
 				       DMA_FROM_DEVICE);
 		} else {
-			dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%p), len(%d), page(%p)\n",
-				(void *)dma_buff, buf_len, page);
+			dev_err(netcp->ndev_dev, "Bad Rx desc dma_buff(%pad), len(%d), page(%p)\n",
+				&dma_buff, buf_len, page);
 			goto free_desc;
 		}
 
@@ -750,7 +763,6 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq)
 	unsigned int buf_len, dma_sz;
 	dma_addr_t dma;
 	void *buf_ptr;
-	u32 tmp;
 
 	/* Allocate descriptor */
 	while ((dma = knav_queue_pop(netcp->rx_fdq[fdq], &dma_sz))) {
@@ -761,7 +773,7 @@ static void netcp_free_rx_buf(struct netcp_intf *netcp, int fdq)
 		}
 
 		get_org_pkt_info(&dma, &buf_len, desc);
-		get_pad_info((u32 *)&buf_ptr, &tmp, desc);
+		get_pad_ptr(buf_ptr, desc);
 
 		if (unlikely(!dma)) {
 			dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@ -813,7 +825,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 	struct page *page;
 	dma_addr_t dma;
 	void *bufptr;
-	u32 pad[2];
+	u32 pad[3];
 
 	/* Allocate descriptor */
 	hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@ -830,7 +842,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 		bufptr = netdev_alloc_frag(primary_buf_len);
-		pad[1] = primary_buf_len;
+		pad[2] = primary_buf_len;
 
 		if (unlikely(!bufptr)) {
 			dev_warn_ratelimited(netcp->ndev_dev,
@@ -842,7 +854,8 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 		if (unlikely(dma_mapping_error(netcp->dev, dma)))
 			goto fail;
 
-		pad[0] = (u32)bufptr;
+		pad[0] = lower_32_bits((uintptr_t)bufptr);
+		pad[1] = upper_32_bits((uintptr_t)bufptr);
 
 	} else {
 		/* Allocate a secondary receive queue entry */
@@ -853,8 +866,9 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 		}
 		buf_len = PAGE_SIZE;
 		dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
-		pad[0] = (u32)page;
-		pad[1] = 0;
+		pad[0] = lower_32_bits(dma);
+		pad[1] = upper_32_bits(dma);
+		pad[2] = 0;
 	}
 
 	desc_info =  KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@ -864,7 +878,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 	pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
 		    KNAV_DMA_DESC_RETQ_SHIFT;
 	set_org_pkt_info(dma, buf_len, hwdesc);
-	set_pad_info(pad[0], pad[1], hwdesc);
+	set_pad_info(pad[0], pad[1], pad[2], hwdesc);
 	set_desc_info(desc_info, pkt_info, hwdesc);
 
 	/* Push to FDQs */
@@ -935,8 +949,8 @@ static void netcp_free_tx_desc_chain(struct netcp_intf *netcp,
 			dma_unmap_single(netcp->dev, dma_buf, buf_len,
 					 DMA_TO_DEVICE);
 		else
-			dev_warn(netcp->ndev_dev, "bad Tx desc buf(%p), len(%d)\n",
-				 (void *)dma_buf, buf_len);
+			dev_warn(netcp->ndev_dev, "bad Tx desc buf(%pad), len(%d)\n",
+				 &dma_buf, buf_len);
 
 		knav_pool_desc_put(netcp->tx_pool, ndesc);
 		ndesc = NULL;
@@ -953,11 +967,11 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
 					  unsigned int budget)
 {
 	struct knav_dma_desc *desc;
+	void *ptr;
 	struct sk_buff *skb;
 	unsigned int dma_sz;
 	dma_addr_t dma;
 	int pkts = 0;
-	u32 tmp;
 
 	while (budget--) {
 		dma = knav_queue_pop(netcp->tx_compl_q, &dma_sz);
@@ -970,7 +984,8 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
 			continue;
 		}
 
-		get_pad_info((u32 *)&skb, &tmp, desc);
+		get_pad_ptr(&ptr, desc);
+		skb = ptr;
 		netcp_free_tx_desc_chain(netcp, desc, dma_sz);
 		if (!skb) {
 			dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@ -1059,6 +1074,7 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp)
 		u32 page_offset = frag->page_offset;
 		u32 buf_len = skb_frag_size(frag);
 		dma_addr_t desc_dma;
+		u32 desc_dma_32;
 		u32 pkt_info;
 
 		dma_addr = dma_map_page(dev, page, page_offset, buf_len,
@@ -1075,13 +1091,13 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp)
 			goto free_descs;
 		}
 
-		desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool,
-						      (void *)ndesc);
+		desc_dma = knav_pool_desc_virt_to_dma(netcp->tx_pool, ndesc);
 		pkt_info =
 			(netcp->tx_compl_qid & KNAV_DMA_DESC_RETQ_MASK) <<
 				KNAV_DMA_DESC_RETQ_SHIFT;
 		set_pkt_info(dma_addr, buf_len, 0, ndesc);
-		set_words(&desc_dma, 1, &pdesc->next_desc);
+		desc_dma_32 = (u32)desc_dma;
+		set_words(&desc_dma_32, 1, &pdesc->next_desc);
 		pkt_len += buf_len;
 		if (pdesc != desc)
 			knav_pool_desc_map(netcp->tx_pool, pdesc,
@@ -1129,8 +1145,8 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 	p_info.ts_context = NULL;
 	p_info.txtstamp_complete = NULL;
 	p_info.epib = desc->epib;
-	p_info.psdata = desc->psdata;
-	memset(p_info.epib, 0, KNAV_DMA_NUM_EPIB_WORDS * sizeof(u32));
+	p_info.psdata = (u32 __force *)desc->psdata;
+	memset(p_info.epib, 0, KNAV_DMA_NUM_EPIB_WORDS * sizeof(__le32));
 
 	/* Find out where to inject the packet for transmission */
 	list_for_each_entry(tx_hook, &netcp->txhook_list_head, list) {
@@ -1154,11 +1170,12 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 
 	/* update descriptor */
 	if (p_info.psdata_len) {
-		u32 *psdata = p_info.psdata;
+		/* psdata points to both native-endian and device-endian data */
+		__le32 *psdata = (void __force *)p_info.psdata;
 
 		memmove(p_info.psdata, p_info.psdata + p_info.psdata_len,
 			p_info.psdata_len);
-		set_words(psdata, p_info.psdata_len, psdata);
+		set_words(p_info.psdata, p_info.psdata_len, psdata);
 		tmp |= (p_info.psdata_len & KNAV_DMA_DESC_PSLEN_MASK) <<
 			KNAV_DMA_DESC_PSLEN_SHIFT;
 	}
@@ -1173,11 +1190,14 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 	}
 
 	set_words(&tmp, 1, &desc->packet_info);
-	set_words((u32 *)&skb, 1, &desc->pad[0]);
+	tmp = lower_32_bits((uintptr_t)&skb);
+	set_words(&tmp, 1, &desc->pad[0]);
+	tmp = upper_32_bits((uintptr_t)&skb);
+	set_words(&tmp, 1, &desc->pad[1]);
 
 	if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
 		tmp = tx_pipe->switch_to_port;
-		set_words((u32 *)&tmp, 1, &desc->tag_info);
+		set_words(&tmp, 1, &desc->tag_info);
 	}
 
 	/* submit packet descriptor */