summaryrefslogtreecommitdiff
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
commit9ff9b0d392ea08090cd1780fb196f36dbb586529 (patch)
tree276a3a5c4525b84dee64eda30b423fc31bf94850 /include/linux/netdevice.h
parent840e5bb326bbcb16ce82dd2416d2769de4839aea (diff)
parent105faa8742437c28815b2a3eb8314ebc5fd9288c (diff)
Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: - Add redirect_neigh() BPF packet redirect helper, allowing to limit stack traversal in common container configs and improving TCP back-pressure. Daniel reports ~10Gbps => ~15Gbps single stream TCP performance gain. - Expand netlink policy support and improve policy export to user space. (Ge)netlink core performs request validation according to declared policies. Expand the expressiveness of those policies (min/max length and bitmasks). Allow dumping policies for particular commands. This is used for feature discovery by user space (instead of kernel version parsing or trial and error). - Support IGMPv3/MLDv2 multicast listener discovery protocols in bridge. - Allow more than 255 IPv4 multicast interfaces. - Add support for Type of Service (ToS) reflection in SYN/SYN-ACK packets of TCPv6. - In Multi-patch TCP (MPTCP) support concurrent transmission of data on multiple subflows in a load balancing scenario. Enhance advertising addresses via the RM_ADDR/ADD_ADDR options. - Support SMC-Dv2 version of SMC, which enables multi-subnet deployments. - Allow more calls to same peer in RxRPC. - Support two new Controller Area Network (CAN) protocols - CAN-FD and ISO 15765-2:2016. - Add xfrm/IPsec compat layer, solving the 32bit user space on 64bit kernel problem. - Add TC actions for implementing MPLS L2 VPNs. - Improve nexthop code - e.g. handle various corner cases when nexthop objects are removed from groups better, skip unnecessary notifications and make it easier to offload nexthops into HW by converting to a blocking notifier. - Support adding and consuming TCP header options by BPF programs, opening the doors for easy experimental and deployment-specific TCP option use. - Reorganize TCP congestion control (CC) initialization to simplify life of TCP CC implemented in BPF. - Add support for shipping BPF programs with the kernel and loading them early on boot via the User Mode Driver mechanism, hence reusing all the user space infra we have. - Support sleepable BPF programs, initially targeting LSM and tracing. - Add bpf_d_path() helper for returning full path for given 'struct path'. - Make bpf_tail_call compatible with bpf-to-bpf calls. - Allow BPF programs to call map_update_elem on sockmaps. - Add BPF Type Format (BTF) support for type and enum discovery, as well as support for using BTF within the kernel itself (current use is for pretty printing structures). - Support listing and getting information about bpf_links via the bpf syscall. - Enhance kernel interfaces around NIC firmware update. Allow specifying overwrite mask to control if settings etc. are reset during update; report expected max time operation may take to users; support firmware activation without machine reboot incl. limits of how much impact reset may have (e.g. dropping link or not). - Extend ethtool configuration interface to report IEEE-standard counters, to limit the need for per-vendor logic in user space. - Adopt or extend devlink use for debug, monitoring, fw update in many drivers (dsa loop, ice, ionic, sja1105, qed, mlxsw, mv88e6xxx, dpaa2-eth). - In mlxsw expose critical and emergency SFP module temperature alarms. Refactor port buffer handling to make the defaults more suitable and support setting these values explicitly via the DCBNL interface. - Add XDP support for Intel's igb driver. - Support offloading TC flower classification and filtering rules to mscc_ocelot switches. - Add PTP support for Marvell Octeontx2 and PP2.2 hardware, as well as fixed interval period pulse generator and one-step timestamping in dpaa-eth. - Add support for various auth offloads in WiFi APs, e.g. SAE (WPA3) offload. - Add Lynx PHY/PCS MDIO module, and convert various drivers which have this HW to use it. Convert mvpp2 to split PCS. - Support Marvell Prestera 98DX3255 24-port switch ASICs, as well as 7-port Mediatek MT7531 IP. - Add initial support for QCA6390 and IPQ6018 in ath11k WiFi driver, and wcn3680 support in wcn36xx. - Improve performance for packets which don't require much offloads on recent Mellanox NICs by 20% by making multiple packets share a descriptor entry. - Move chelsio inline crypto drivers (for TLS and IPsec) from the crypto subtree to drivers/net. Move MDIO drivers out of the phy directory. - Clean up a lot of W=1 warnings, reportedly the actively developed subsections of networking drivers should now build W=1 warning free. - Make sure drivers don't use in_interrupt() to dynamically adapt their code. Convert tasklets to use new tasklet_setup API (sadly this conversion is not yet complete). * tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2583 commits) Revert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH" net, sockmap: Don't call bpf_prog_put() on NULL pointer bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo bpf, sockmap: Add locking annotations to iterator netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements net: fix pos incrementment in ipv6_route_seq_next net/smc: fix invalid return code in smcd_new_buf_create() net/smc: fix valid DMBE buffer sizes net/smc: fix use-after-free of delayed events bpfilter: Fix build error with CONFIG_BPFILTER_UMH cxgb4/ch_ipsec: Replace the module name to ch_ipsec from chcr net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info bpf: Fix register equivalence tracking. rxrpc: Fix loss of final ack on shutdown rxrpc: Fix bundle counting for exclusive connections netfilter: restore NF_INET_NUMHOOKS ibmveth: Identify ingress large send packets. ibmveth: Switch order of ibmveth_helper calls. cxgb4: handle 4-tuple PEDIT to NAT mode translation selftests: Add VRF route leaking tests ...
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h105
1 files changed, 66 insertions, 39 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 18dec08439f9..964b494b0e8d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -70,6 +70,7 @@ struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
+void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
@@ -211,9 +212,8 @@ struct netdev_hw_addr {
unsigned char type;
#define NETDEV_HW_ADDR_T_LAN 1
#define NETDEV_HW_ADDR_T_SAN 2
-#define NETDEV_HW_ADDR_T_SLAVE 3
-#define NETDEV_HW_ADDR_T_UNICAST 4
-#define NETDEV_HW_ADDR_T_MULTICAST 5
+#define NETDEV_HW_ADDR_T_UNICAST 3
+#define NETDEV_HW_ADDR_T_MULTICAST 4
bool global_use;
int sync_cnt;
int refcount;
@@ -354,7 +354,7 @@ enum {
NAPI_STATE_MISSED, /* reschedule a napi */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
- NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
+ NAPI_STATE_LISTED, /* NAPI added to system lists */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
};
@@ -364,7 +364,7 @@ enum {
NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
- NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
+ NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
};
@@ -489,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n)
}
/**
- * napi_hash_del - remove a NAPI from global table
- * @napi: NAPI context
- *
- * Warning: caller must observe RCU grace period
- * before freeing memory containing @napi, if
- * this function returns true.
- * Note: core networking stack automatically calls it
- * from netif_napi_del().
- * Drivers might want to call this helper to combine all
- * the needed RCU grace periods into a single one.
- */
-bool napi_hash_del(struct napi_struct *napi);
-
-/**
* napi_disable - prevent NAPI from scheduling
* @n: NAPI context
*
@@ -618,7 +604,7 @@ struct netdev_queue {
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
#ifdef CONFIG_XDP_SOCKETS
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
#endif
/*
* write-mostly part
@@ -640,11 +626,16 @@ struct netdev_queue {
extern int sysctl_fb_tunnels_only_for_init_net;
extern int sysctl_devconf_inherit_init_net;
+/*
+ * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns
+ * == 1 : For initns only
+ * == 2 : For none.
+ */
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return net == &init_net ||
- !IS_ENABLED(CONFIG_SYSCTL) ||
- !sysctl_fb_tunnels_only_for_init_net;
+ return !IS_ENABLED(CONFIG_SYSCTL) ||
+ !sysctl_fb_tunnels_only_for_init_net ||
+ (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
}
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -751,7 +742,7 @@ struct netdev_rx_queue {
struct net_device *dev;
struct xdp_rxq_info xdp_rxq;
#ifdef CONFIG_XDP_SOCKETS
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
#endif
} ____cacheline_aligned_in_smp;
@@ -879,7 +870,7 @@ enum bpf_netdev_command {
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
- XDP_SETUP_XSK_UMEM,
+ XDP_SETUP_XSK_POOL,
};
struct bpf_prog_offload_ops;
@@ -913,9 +904,9 @@ struct netdev_bpf {
struct {
struct bpf_offloaded_map *offmap;
};
- /* XDP_SETUP_XSK_UMEM */
+ /* XDP_SETUP_XSK_POOL */
struct {
- struct xdp_umem *umem;
+ struct xsk_buff_pool *pool;
u16 queue_id;
} xsk;
};
@@ -1285,6 +1276,9 @@ struct netdev_net_notifier {
* int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
* int cmd);
* Add, change, delete or get information on an IPv4 tunnel.
+ * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
+ * If a device is paired with a peer device, return the peer instance.
+ * The caller must be under RCU read context.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1492,6 +1486,7 @@ struct net_device_ops {
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
int (*ndo_tunnel_ctl)(struct net_device *dev,
struct ip_tunnel_parm *p, int cmd);
+ struct net_device * (*ndo_get_peer_dev)(struct net_device *dev);
};
/**
@@ -2208,6 +2203,22 @@ int netdev_get_num_tc(struct net_device *dev)
return dev->num_tc;
}
+static inline void net_prefetch(void *p)
+{
+ prefetch(p);
+#if L1_CACHE_BYTES < 128
+ prefetch((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
+static inline void net_prefetchw(void *p)
+{
+ prefetchw(p);
+#if L1_CACHE_BYTES < 128
+ prefetchw((u8 *)p + L1_CACHE_BYTES);
+#endif
+}
+
void netdev_unbind_sb_channel(struct net_device *dev,
struct net_device *sb_dev);
int netdev_bind_sb_channel_queue(struct net_device *dev,
@@ -2363,12 +2374,26 @@ static inline void netif_tx_napi_add(struct net_device *dev,
}
/**
+ * __netif_napi_del - remove a NAPI context
+ * @napi: NAPI context
+ *
+ * Warning: caller must observe RCU grace period before freeing memory
+ * containing @napi. Drivers might want to call this helper to combine
+ * all the needed RCU grace periods into a single one.
+ */
+void __netif_napi_del(struct napi_struct *napi);
+
+/**
* netif_napi_del - remove a NAPI context
* @napi: NAPI context
*
* netif_napi_del() removes a NAPI context from the network device NAPI list
*/
-void netif_napi_del(struct napi_struct *napi);
+static inline void netif_napi_del(struct napi_struct *napi)
+{
+ __netif_napi_del(napi);
+ synchronize_net();
+}
struct napi_gro_cb {
/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
@@ -2522,6 +2547,16 @@ struct pcpu_lstats {
void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);
+static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
+{
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_bytes += len;
+ tstats->rx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+}
+
static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
{
struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats);
@@ -2792,7 +2827,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
-void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
@@ -3777,6 +3811,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
int netif_rx(struct sk_buff *skb);
int netif_rx_ni(struct sk_buff *skb);
+int netif_rx_any_context(struct sk_buff *skb);
int netif_receive_skb(struct sk_buff *skb);
int netif_receive_skb_core(struct sk_buff *skb);
void netif_receive_skb_list(struct list_head *head);
@@ -4464,6 +4499,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *storage);
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats);
+void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
+ const struct pcpu_sw_netstats __percpu *netstats);
extern int netdev_max_backlog;
extern int netdev_tstamp_prequeue;
@@ -4704,16 +4741,6 @@ int netdev_class_create_file_ns(const struct class_attribute *class_attr,
void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
const void *ns);
-static inline int netdev_class_create_file(const struct class_attribute *class_attr)
-{
- return netdev_class_create_file_ns(class_attr, NULL);
-}
-
-static inline void netdev_class_remove_file(const struct class_attribute *class_attr)
-{
- netdev_class_remove_file_ns(class_attr, NULL);
-}
-
extern const struct kobj_ns_type_operations net_ns_type_operations;
const char *netdev_drivername(const struct net_device *dev);