summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/sfc/efx_common.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
commit9ff9b0d392ea08090cd1780fb196f36dbb586529 (patch)
tree276a3a5c4525b84dee64eda30b423fc31bf94850 /drivers/net/ethernet/sfc/efx_common.c
parent840e5bb326bbcb16ce82dd2416d2769de4839aea (diff)
parent105faa8742437c28815b2a3eb8314ebc5fd9288c (diff)
Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: - Add redirect_neigh() BPF packet redirect helper, allowing to limit stack traversal in common container configs and improving TCP back-pressure. Daniel reports ~10Gbps => ~15Gbps single stream TCP performance gain. - Expand netlink policy support and improve policy export to user space. (Ge)netlink core performs request validation according to declared policies. Expand the expressiveness of those policies (min/max length and bitmasks). Allow dumping policies for particular commands. This is used for feature discovery by user space (instead of kernel version parsing or trial and error). - Support IGMPv3/MLDv2 multicast listener discovery protocols in bridge. - Allow more than 255 IPv4 multicast interfaces. - Add support for Type of Service (ToS) reflection in SYN/SYN-ACK packets of TCPv6. - In Multi-patch TCP (MPTCP) support concurrent transmission of data on multiple subflows in a load balancing scenario. Enhance advertising addresses via the RM_ADDR/ADD_ADDR options. - Support SMC-Dv2 version of SMC, which enables multi-subnet deployments. - Allow more calls to same peer in RxRPC. - Support two new Controller Area Network (CAN) protocols - CAN-FD and ISO 15765-2:2016. - Add xfrm/IPsec compat layer, solving the 32bit user space on 64bit kernel problem. - Add TC actions for implementing MPLS L2 VPNs. - Improve nexthop code - e.g. handle various corner cases when nexthop objects are removed from groups better, skip unnecessary notifications and make it easier to offload nexthops into HW by converting to a blocking notifier. - Support adding and consuming TCP header options by BPF programs, opening the doors for easy experimental and deployment-specific TCP option use. - Reorganize TCP congestion control (CC) initialization to simplify life of TCP CC implemented in BPF. - Add support for shipping BPF programs with the kernel and loading them early on boot via the User Mode Driver mechanism, hence reusing all the user space infra we have. - Support sleepable BPF programs, initially targeting LSM and tracing. - Add bpf_d_path() helper for returning full path for given 'struct path'. - Make bpf_tail_call compatible with bpf-to-bpf calls. - Allow BPF programs to call map_update_elem on sockmaps. - Add BPF Type Format (BTF) support for type and enum discovery, as well as support for using BTF within the kernel itself (current use is for pretty printing structures). - Support listing and getting information about bpf_links via the bpf syscall. - Enhance kernel interfaces around NIC firmware update. Allow specifying overwrite mask to control if settings etc. are reset during update; report expected max time operation may take to users; support firmware activation without machine reboot incl. limits of how much impact reset may have (e.g. dropping link or not). - Extend ethtool configuration interface to report IEEE-standard counters, to limit the need for per-vendor logic in user space. - Adopt or extend devlink use for debug, monitoring, fw update in many drivers (dsa loop, ice, ionic, sja1105, qed, mlxsw, mv88e6xxx, dpaa2-eth). - In mlxsw expose critical and emergency SFP module temperature alarms. Refactor port buffer handling to make the defaults more suitable and support setting these values explicitly via the DCBNL interface. - Add XDP support for Intel's igb driver. - Support offloading TC flower classification and filtering rules to mscc_ocelot switches. - Add PTP support for Marvell Octeontx2 and PP2.2 hardware, as well as fixed interval period pulse generator and one-step timestamping in dpaa-eth. - Add support for various auth offloads in WiFi APs, e.g. SAE (WPA3) offload. - Add Lynx PHY/PCS MDIO module, and convert various drivers which have this HW to use it. Convert mvpp2 to split PCS. - Support Marvell Prestera 98DX3255 24-port switch ASICs, as well as 7-port Mediatek MT7531 IP. - Add initial support for QCA6390 and IPQ6018 in ath11k WiFi driver, and wcn3680 support in wcn36xx. - Improve performance for packets which don't require much offloads on recent Mellanox NICs by 20% by making multiple packets share a descriptor entry. - Move chelsio inline crypto drivers (for TLS and IPsec) from the crypto subtree to drivers/net. Move MDIO drivers out of the phy directory. - Clean up a lot of W=1 warnings, reportedly the actively developed subsections of networking drivers should now build W=1 warning free. - Make sure drivers don't use in_interrupt() to dynamically adapt their code. Convert tasklets to use new tasklet_setup API (sadly this conversion is not yet complete). * tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2583 commits) Revert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH" net, sockmap: Don't call bpf_prog_put() on NULL pointer bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo bpf, sockmap: Add locking annotations to iterator netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements net: fix pos incrementment in ipv6_route_seq_next net/smc: fix invalid return code in smcd_new_buf_create() net/smc: fix valid DMBE buffer sizes net/smc: fix use-after-free of delayed events bpfilter: Fix build error with CONFIG_BPFILTER_UMH cxgb4/ch_ipsec: Replace the module name to ch_ipsec from chcr net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info bpf: Fix register equivalence tracking. rxrpc: Fix loss of final ack on shutdown rxrpc: Fix bundle counting for exclusive connections netfilter: restore NF_INET_NUMHOOKS ibmveth: Identify ingress large send packets. ibmveth: Switch order of ibmveth_helper calls. cxgb4: handle 4-tuple PEDIT to NAT mode translation selftests: Add VRF route leaking tests ...
Diffstat (limited to 'drivers/net/ethernet/sfc/efx_common.c')
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c124
1 files changed, 90 insertions, 34 deletions
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index dfc6032e75f4..72a3f0e09f52 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <net/gre.h>
#include "efx_common.h"
#include "efx_channels.h"
#include "efx.h"
@@ -19,6 +20,7 @@
#include "rx_common.h"
#include "tx_common.h"
#include "nic.h"
+#include "mcdi_port_common.h"
#include "io.h"
#include "mcdi_pcol.h"
@@ -544,7 +546,7 @@ void efx_start_all(struct efx_nic *efx)
* to poll now because we could have missed a change
*/
mutex_lock(&efx->mac_lock);
- if (efx->phy_op->poll(efx))
+ if (efx_mcdi_phy_poll(efx))
efx_link_status_changed(efx);
mutex_unlock(&efx->mac_lock);
@@ -600,7 +602,7 @@ void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
struct efx_nic *efx = netdev_priv(net_dev);
spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, stats);
+ efx_nic_update_stats_atomic(efx, NULL, stats);
spin_unlock_bh(&efx->stats_lock);
}
@@ -714,9 +716,6 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
mutex_lock(&efx->mac_lock);
down_write(&efx->filter_sem);
mutex_lock(&efx->rss_lock);
- if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
- method != RESET_TYPE_DATAPATH)
- efx->phy_op->fini(efx);
efx->type->fini(efx);
}
@@ -759,10 +758,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
method != RESET_TYPE_DATAPATH) {
- rc = efx->phy_op->init(efx);
- if (rc)
- goto fail;
- rc = efx->phy_op->reconfigure(efx);
+ rc = efx_mcdi_port_reconfigure(efx);
if (rc && rc != -EPERM)
netif_err(efx, drv, efx->net_dev,
"could not restore PHY settings\n");
@@ -959,7 +955,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
/**************************************************************************
*
- * Dummy PHY/MAC operations
+ * Dummy NIC operations
*
* Can be used for some unimplemented operations
* Needed so all function pointers are valid and do not have to be tested
@@ -972,18 +968,6 @@ int efx_port_dummy_op_int(struct efx_nic *efx)
}
void efx_port_dummy_op_void(struct efx_nic *efx) {}
-static bool efx_port_dummy_op_poll(struct efx_nic *efx)
-{
- return false;
-}
-
-static const struct efx_phy_operations efx_dummy_phy_operations = {
- .init = efx_port_dummy_op_int,
- .reconfigure = efx_port_dummy_op_int,
- .poll = efx_port_dummy_op_poll,
- .fini = efx_port_dummy_op_void,
-};
-
/**************************************************************************
*
* Data housekeeping
@@ -1037,7 +1021,6 @@ int efx_init_struct(struct efx_nic *efx,
efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
sizeof(*efx->rps_hash_table), GFP_KERNEL);
#endif
- efx->phy_op = &efx_dummy_phy_operations;
efx->mdio.dev = net_dev;
INIT_WORK(&efx->mac_work, efx_mac_work);
init_waitqueue_head(&efx->flush_wq);
@@ -1104,17 +1087,7 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
pci_set_master(pci_dev);
- /* Set the PCI DMA mask. Try all possibilities from our
- * genuine mask down to 32 bits, because some architectures
- * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
- * masks event though they reject 46 bit masks.
- */
- while (dma_mask > 0x7fffffffUL) {
- rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
- if (rc == 0)
- break;
- dma_mask >>= 1;
- }
+ rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
if (rc) {
netif_err(efx, probe, efx->net_dev,
"could not find a suitable DMA mask\n");
@@ -1315,6 +1288,89 @@ const struct pci_error_handlers efx_err_handlers = {
.resume = efx_io_resume,
};
+/* Determine whether the NIC will be able to handle TX offloads for a given
+ * encapsulated packet.
+ */
+static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct gre_base_hdr *greh;
+ __be16 dst_port;
+ u8 ipproto;
+
+ /* Does the NIC support encap offloads?
+ * If not, we should never get here, because we shouldn't have
+ * advertised encap offload feature flags in the first place.
+ */
+ if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port))
+ return false;
+
+ /* Determine encapsulation protocol in use */
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipproto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ /* If there are extension headers, this will cause us to
+ * think we can't offload something that we maybe could have.
+ */
+ ipproto = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ /* Not IP, so can't offload it */
+ return false;
+ }
+ switch (ipproto) {
+ case IPPROTO_GRE:
+ /* We support NVGRE but not IP over GRE or random gretaps.
+ * Specifically, the NIC will accept GRE as encapsulated if
+ * the inner protocol is Ethernet, but only handle it
+ * correctly if the GRE header is 8 bytes long. Moreover,
+ * it will not update the Checksum or Sequence Number fields
+ * if they are present. (The Routing Present flag,
+ * GRE_ROUTING, cannot be set else the header would be more
+ * than 8 bytes long; so we don't have to worry about it.)
+ */
+ if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+ return false;
+ if (ntohs(skb->inner_protocol) != ETH_P_TEB)
+ return false;
+ if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8)
+ return false;
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ return !(greh->flags & (GRE_CSUM | GRE_SEQ));
+ case IPPROTO_UDP:
+ /* If the port is registered for a UDP tunnel, we assume the
+ * packet is for that tunnel, and the NIC will handle it as
+ * such. If not, the NIC won't know what to do with it.
+ */
+ dst_port = udp_hdr(skb)->dest;
+ return efx->type->udp_tnl_has_port(efx, dst_port);
+ default:
+ return false;
+ }
+}
+
+netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
+ netdev_features_t features)
+{
+ struct efx_nic *efx = netdev_priv(dev);
+
+ if (skb->encapsulation) {
+ if (features & NETIF_F_GSO_MASK)
+ /* Hardware can only do TSO with at most 208 bytes
+ * of headers.
+ */
+ if (skb_inner_transport_offset(skb) >
+ EFX_TSO2_MAX_HDRLEN)
+ features &= ~(NETIF_F_GSO_MASK);
+ if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK))
+ if (!efx_can_encap_offloads(efx, skb))
+ features &= ~(NETIF_F_GSO_MASK |
+ NETIF_F_CSUM_MASK);
+ }
+ return features;
+}
+
int efx_get_phys_port_id(struct net_device *net_dev,
struct netdev_phys_item_id *ppid)
{