diff options
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r-- | include/linux/netdevice.h | 673 |
1 files changed, 472 insertions, 201 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 66f9a04ec270..838407aea705 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -30,6 +30,7 @@ #include <linux/bug.h> #include <linux/delay.h> #include <linux/atomic.h> +#include <linux/prefetch.h> #include <asm/cache.h> #include <asm/byteorder.h> @@ -543,7 +544,7 @@ struct netdev_queue { * read mostly part */ struct net_device *dev; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; @@ -943,7 +944,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, - * struct net_device *dev, int idx) + * struct net_device *dev, struct net_device *filter_dev, + * int idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1114,6 +1116,7 @@ struct net_device_ops { int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx); int (*ndo_bridge_setlink)(struct net_device *dev, @@ -1204,6 +1207,7 @@ enum netdev_priv_flags { IFF_SUPP_NOFCS = 1<<19, IFF_LIVE_ADDR_CHANGE = 1<<20, IFF_MACVLAN = 1<<21, + IFF_XMIT_DST_RELEASE_PERM = 1<<22, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1228,43 +1232,232 @@ enum netdev_priv_flags { #define IFF_SUPP_NOFCS IFF_SUPP_NOFCS #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE #define IFF_MACVLAN IFF_MACVLAN +#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM -/* - * The DEVICE structure. - * Actually, this whole structure is a big mistake. It mixes I/O - * data with strictly "high-level" data, and it has to know about - * almost every data structure used in the INET module. +/** + * struct net_device - The DEVICE structure. + * Actually, this whole structure is a big mistake. It mixes I/O + * data with strictly "high-level" data, and it has to know about + * almost every data structure used in the INET module. + * + * @name: This is the first field of the "visible" part of this structure + * (i.e. as seen by users in the "Space.c" file). It is the name + * of the interface. + * + * @name_hlist: Device name hash chain, please keep it close to name[] + * @ifalias: SNMP alias + * @mem_end: Shared memory end + * @mem_start: Shared memory start + * @base_addr: Device I/O address + * @irq: Device IRQ number + * + * @state: Generic network queuing layer state, see netdev_state_t + * @dev_list: The global list of network devices + * @napi_list: List entry, that is used for polling napi devices + * @unreg_list: List entry, that is used, when we are unregistering the + * device, see the function unregister_netdev + * @close_list: List entry, that is used, when we are closing the device + * + * @adj_list: Directly linked devices, like slaves for bonding + * @all_adj_list: All linked devices, *including* neighbours + * @features: Currently active device features + * @hw_features: User-changeable features + * + * @wanted_features: User-requested features + * @vlan_features: Mask of features inheritable by VLAN devices + * + * @hw_enc_features: Mask of features inherited by encapsulating devices + * This field indicates what encapsulation + * offloads the hardware is capable of doing, + * and drivers will need to set them appropriately. + * + * @mpls_features: Mask of features inheritable by MPLS + * + * @ifindex: interface index + * @iflink: unique device identifier + * + * @stats: Statistics struct, which was left as a legacy, use + * rtnl_link_stats64 instead + * + * @rx_dropped: Dropped packets by core network, + * do not use this in drivers + * @tx_dropped: Dropped packets by core network, + * do not use this in drivers + * + * @carrier_changes: Stats to monitor carrier on<->off transitions + * + * @wireless_handlers: List of functions to handle Wireless Extensions, + * instead of ioctl, + * see <net/iw_handler.h> for details. + * @wireless_data: Instance data managed by the core of wireless extensions + * + * @netdev_ops: Includes several pointers to callbacks, + * if one wants to override the ndo_*() functions + * @ethtool_ops: Management operations + * @fwd_ops: Management operations + * @header_ops: Includes callbacks for creating,parsing,rebuilding,etc + * of Layer 2 headers. + * + * @flags: Interface flags (a la BSD) + * @priv_flags: Like 'flags' but invisible to userspace, + * see if.h for the definitions + * @gflags: Global flags ( kept as legacy ) + * @padded: How much padding added by alloc_netdev() + * @operstate: RFC2863 operstate + * @link_mode: Mapping policy to operstate + * @if_port: Selectable AUI, TP, ... + * @dma: DMA channel + * @mtu: Interface MTU value + * @type: Interface hardware type + * @hard_header_len: Hardware header length + * + * @needed_headroom: Extra headroom the hardware may need, but not in all + * cases can this be guaranteed + * @needed_tailroom: Extra tailroom the hardware may need, but not in all + * cases can this be guaranteed. Some cases also use + * LL_MAX_HEADER instead to allocate the skb + * + * interface address info: + * + * @perm_addr: Permanent hw address + * @addr_assign_type: Hw address assignment type + * @addr_len: Hardware address length + * @neigh_priv_len; Used in neigh_alloc(), + * initialized only in atm/clip.c + * @dev_id: Used to differentiate devices that share + * the same link layer address + * @dev_port: Used to differentiate devices that share + * the same function + * @addr_list_lock: XXX: need comments on this one + * @uc: unicast mac addresses + * @mc: multicast mac addresses + * @dev_addrs: list of device hw addresses + * @queues_kset: Group of all Kobjects in the Tx and RX queues + * @uc_promisc: Counter, that indicates, that promiscuous mode + * has been enabled due to the need to listen to + * additional unicast addresses in a device that + * does not implement ndo_set_rx_mode() + * @promiscuity: Number of times, the NIC is told to work in + * Promiscuous mode, if it becomes 0 the NIC will + * exit from working in Promiscuous mode + * @allmulti: Counter, enables or disables allmulticast mode + * + * @vlan_info: VLAN info + * @dsa_ptr: dsa specific data + * @tipc_ptr: TIPC specific data + * @atalk_ptr: AppleTalk link + * @ip_ptr: IPv4 specific data + * @dn_ptr: DECnet specific data + * @ip6_ptr: IPv6 specific data + * @ax25_ptr: AX.25 specific data + * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering + * + * @last_rx: Time of last Rx + * @dev_addr: Hw address (before bcast, + * because most packets are unicast) + * + * @_rx: Array of RX queues + * @num_rx_queues: Number of RX queues + * allocated at register_netdev() time + * @real_num_rx_queues: Number of RX queues currently active in device + * + * @rx_handler: handler for received packets + * @rx_handler_data: XXX: need comments on this one + * @ingress_queue: XXX: need comments on this one + * @broadcast: hw bcast address + * + * @_tx: Array of TX queues + * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time + * @real_num_tx_queues: Number of TX queues currently active in device + * @qdisc: Root qdisc from userspace point of view + * @tx_queue_len: Max frames per queue allowed + * @tx_global_lock: XXX: need comments on this one + * + * @xps_maps: XXX: need comments on this one + * + * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, + * indexed by RX queue number. Assigned by driver. + * This must only be set if the ndo_rx_flow_steer + * operation is defined + * + * @trans_start: Time (in jiffies) of last Tx + * @watchdog_timeo: Represents the timeout that is used by + * the watchdog ( see dev_watchdog() ) + * @watchdog_timer: List of timers + * + * @pcpu_refcnt: Number of references to this device + * @todo_list: Delayed register/unregister + * @index_hlist: Device index hash chain + * @link_watch_list: XXX: need comments on this one + * + * @reg_state: Register/unregister state machine + * @dismantle: Device is going to be freed + * @rtnl_link_state: This enum represents the phases of creating + * a new link + * + * @destructor: Called from unregister, + * can be used to call free_netdev + * @npinfo: XXX: need comments on this one + * @nd_net: Network namespace this network device is inside + * + * @ml_priv: Mid-layer private + * @lstats: Loopback statistics + * @tstats: Tunnel statistics + * @dstats: Dummy statistics + * @vstats: Virtual ethernet statistics + * + * @garp_port: GARP + * @mrp_port: MRP + * + * @dev: Class/net/name entry + * @sysfs_groups: Space for optional device, statistics and wireless + * sysfs groups + * + * @sysfs_rx_queue_group: Space for optional per-rx queue attributes + * @rtnl_link_ops: Rtnl_link_ops + * + * @gso_max_size: Maximum size of generic segmentation offload + * @gso_max_segs: Maximum number of segments that can be passed to the + * NIC for GSO + * @gso_min_segs: Minimum number of segments that can be passed to the + * NIC for GSO + * + * @dcbnl_ops: Data Center Bridging netlink ops + * @num_tc: Number of traffic classes in the net device + * @tc_to_txq: XXX: need comments on this one + * @prio_tc_map XXX: need comments on this one + * + * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp + * + * @priomap: XXX: need comments on this one + * @phydev: Physical device may attach itself + * for hardware timestamping + * + * @qdisc_tx_busylock: XXX: need comments on this one + * + * @group: The group, that the device belongs to + * @pm_qos_req: Power Management QoS object * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ struct net_device { - - /* - * This is the first field of the "visible" part of this structure - * (i.e. as seen by users in the "Space.c" file). It is the name - * of the interface. - */ char name[IFNAMSIZ]; - - /* device name hash chain, please keep it close to name[] */ struct hlist_node name_hlist; - - /* snmp alias */ char *ifalias; - /* * I/O specific fields * FIXME: Merge these and struct ifmap into one */ - unsigned long mem_end; /* shared mem end */ - unsigned long mem_start; /* shared mem start */ - unsigned long base_addr; /* device I/O address */ - int irq; /* device IRQ number */ + unsigned long mem_end; + unsigned long mem_start; + unsigned long base_addr; + int irq; /* - * Some hardware also needs these fields, but they are not + * Some hardware also needs these fields (state,dev_list, + * napi_list,unreg_list,close_list) but they are not * part of the usual set specified in Space.c. */ @@ -1275,110 +1468,80 @@ struct net_device { struct list_head unreg_list; struct list_head close_list; - /* directly linked devices, like slaves for bonding */ struct { struct list_head upper; struct list_head lower; } adj_list; - /* all linked devices, *including* neighbours */ struct { struct list_head upper; struct list_head lower; } all_adj_list; - - /* currently active device features */ netdev_features_t features; - /* user-changeable features */ netdev_features_t hw_features; - /* user-requested features */ netdev_features_t wanted_features; - /* mask of features inheritable by VLAN devices */ netdev_features_t vlan_features; - /* mask of features inherited by encapsulating devices - * This field indicates what encapsulation offloads - * the hardware is capable of doing, and drivers will - * need to set them appropriately. - */ netdev_features_t hw_enc_features; - /* mask of fetures inheritable by MPLS */ netdev_features_t mpls_features; - /* Interface index. Unique device identifier */ int ifindex; int iflink; struct net_device_stats stats; - /* dropped packets by core network, Do not use this in drivers */ atomic_long_t rx_dropped; atomic_long_t tx_dropped; - /* Stats to monitor carrier on<->off transitions */ atomic_t carrier_changes; #ifdef CONFIG_WIRELESS_EXT - /* List of functions to handle Wireless Extensions (instead of ioctl). - * See <net/iw_handler.h> for details. Jean II */ const struct iw_handler_def * wireless_handlers; - /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif - /* Management operations */ const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; const struct forwarding_accel_ops *fwd_ops; - /* Hardware header description */ const struct header_ops *header_ops; - unsigned int flags; /* interface flags (a la BSD) */ - unsigned int priv_flags; /* Like 'flags' but invisible to userspace. - * See if.h for definitions. */ + unsigned int flags; + unsigned int priv_flags; + unsigned short gflags; - unsigned short padded; /* How much padding added by alloc_netdev() */ + unsigned short padded; - unsigned char operstate; /* RFC2863 operstate */ - unsigned char link_mode; /* mapping policy to operstate */ + unsigned char operstate; + unsigned char link_mode; - unsigned char if_port; /* Selectable AUI, TP,..*/ - unsigned char dma; /* DMA channel */ + unsigned char if_port; + unsigned char dma; - unsigned int mtu; /* interface MTU value */ - unsigned short type; /* interface hardware type */ - unsigned short hard_header_len; /* hardware hdr length */ + unsigned int mtu; + unsigned short type; + unsigned short hard_header_len; - /* extra head- and tailroom the hardware may need, but not in all cases - * can this be guaranteed, especially tailroom. Some cases also use - * LL_MAX_HEADER instead to allocate the skb. - */ unsigned short needed_headroom; unsigned short needed_tailroom; /* Interface address info. */ - unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ - unsigned char addr_assign_type; /* hw address assignment type */ - unsigned char addr_len; /* hardware address length */ + unsigned char perm_addr[MAX_ADDR_LEN]; + unsigned char addr_assign_type; + unsigned char addr_len; unsigned short neigh_priv_len; - unsigned short dev_id; /* Used to differentiate devices - * that share the same link - * layer address - */ - unsigned short dev_port; /* Used to differentiate - * devices that share the same - * function - */ + unsigned short dev_id; + unsigned short dev_port; spinlock_t addr_list_lock; - struct netdev_hw_addr_list uc; /* Unicast mac addresses */ - struct netdev_hw_addr_list mc; /* Multicast mac addresses */ - struct netdev_hw_addr_list dev_addrs; /* list of device - * hw addresses - */ + struct netdev_hw_addr_list uc; + struct netdev_hw_addr_list mc; + struct netdev_hw_addr_list dev_addrs; + #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif + unsigned char name_assign_type; + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1387,40 +1550,34 @@ struct net_device { /* Protocol specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) - struct vlan_info __rcu *vlan_info; /* VLAN info */ + struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) - struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ + struct dsa_switch_tree *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) - struct tipc_bearer __rcu *tipc_ptr; /* TIPC specific data */ + struct tipc_bearer __rcu *tipc_ptr; #endif - void *atalk_ptr; /* AppleTalk link */ - struct in_device __rcu *ip_ptr; /* IPv4 specific data */ - struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ - struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ - void *ax25_ptr; /* AX.25 specific data */ - struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, - assign before registering */ + void *atalk_ptr; + struct in_device __rcu *ip_ptr; + struct dn_dev __rcu *dn_ptr; + struct inet6_dev __rcu *ip6_ptr; + void *ax25_ptr; + struct wireless_dev *ieee80211_ptr; /* * Cache lines mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx */ + unsigned long last_rx; /* Interface address info used in eth_type_trans() */ - unsigned char *dev_addr; /* hw address, (before bcast - because most packets are - unicast) */ + unsigned char *dev_addr; #ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; - /* Number of RX queues allocated at register_netdev() time */ unsigned int num_rx_queues; - - /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; #endif @@ -1429,33 +1586,23 @@ struct net_device { void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + unsigned char broadcast[MAX_ADDR_LEN]; /* * Cache lines mostly used on transmit path */ struct netdev_queue *_tx ____cacheline_aligned_in_smp; - - /* Number of TX queues allocated at alloc_netdev_mq() time */ unsigned int num_tx_queues; - - /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - - /* root qdisc from userspace point of view */ struct Qdisc *qdisc; - - unsigned long tx_queue_len; /* Max frames per queue allowed */ + unsigned long tx_queue_len; spinlock_t tx_global_lock; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif #ifdef CONFIG_RFS_ACCEL - /* CPU reverse-mapping for RX completion interrupts, indexed - * by RX queue number. Assigned by driver. This must only be - * set if the ndo_rx_flow_steer operation is defined. */ struct cpu_rmap *rx_cpu_rmap; #endif @@ -1465,22 +1612,17 @@ struct net_device { * trans_start here is expensive for high speed devices on SMP, * please use netdev_queue->trans_start instead. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ + unsigned long trans_start; - int watchdog_timeo; /* used by dev_watchdog() */ + int watchdog_timeo; struct timer_list watchdog_timer; - /* Number of references to this device */ int __percpu *pcpu_refcnt; - - /* delayed register/unregister */ struct list_head todo_list; - /* device index hash chain */ - struct hlist_node index_hlist; + struct hlist_node index_hlist; struct list_head link_watch_list; - /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, NETREG_REGISTERED, /* completed register_netdevice */ NETREG_UNREGISTERING, /* called unregister_netdevice */ @@ -1489,14 +1631,13 @@ struct net_device { NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state:8; - bool dismantle; /* device is going do be freed */ + bool dismantle; enum { RTNL_LINK_INITIALIZED, RTNL_LINK_INITIALIZING, } rtnl_link_state:16; - /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); #ifdef CONFIG_NETPOLL @@ -1504,31 +1645,25 @@ struct net_device { #endif #ifdef CONFIG_NET_NS - /* Network namespace this network device is inside */ struct net *nd_net; #endif /* mid-layer private */ union { - void *ml_priv; - struct pcpu_lstats __percpu *lstats; /* loopback stats */ + void *ml_priv; + struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; /* dummy stats */ - struct pcpu_vstats __percpu *vstats; /* veth stats */ + struct pcpu_dstats __percpu *dstats; + struct pcpu_vstats __percpu *vstats; }; - /* GARP */ + struct garp_port __rcu *garp_port; - /* MRP */ struct mrp_port __rcu *mrp_port; - /* class/net/name entry */ - struct device dev; - /* space for optional device, statistics, and wireless sysfs groups */ + struct device dev; const struct attribute_group *sysfs_groups[4]; - /* space for optional per-rx queue attributes */ const struct attribute_group *sysfs_rx_queue_group; - /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ @@ -1536,9 +1671,8 @@ struct net_device { unsigned int gso_max_size; #define GSO_MAX_SEGS 65535 u16 gso_max_segs; - + u16 gso_min_segs; #ifdef CONFIG_DCB - /* Data Center Bridging netlink ops */ const struct dcbnl_rtnl_ops *dcbnl_ops; #endif u8 num_tc; @@ -1546,20 +1680,14 @@ struct net_device { u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) - /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; #endif #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; - struct lock_class_key *qdisc_tx_busylock; - - /* group the device belongs to */ int group; - struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1624,6 +1752,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, return &dev->_tx[index]; } +static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev, + const struct sk_buff *skb) +{ + return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); +} + static inline void netdev_for_each_tx_queue(struct net_device *dev, void (*f)(struct net_device *, struct netdev_queue *, @@ -1658,24 +1792,13 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } -static inline bool netdev_uses_dsa_tags(struct net_device *dev) -{ -#ifdef CONFIG_NET_DSA_TAG_DSA - if (dev->dsa_ptr != NULL) - return dsa_uses_dsa_tags(dev->dsa_ptr); -#endif - - return 0; -} - -static inline bool netdev_uses_trailer_tags(struct net_device *dev) +static inline bool netdev_uses_dsa(struct net_device *dev) { -#ifdef CONFIG_NET_DSA_TAG_TRAILER +#if IS_ENABLED(CONFIG_NET_DSA) if (dev->dsa_ptr != NULL) - return dsa_uses_trailer_tags(dev->dsa_ptr); + return dsa_uses_tagged_protocol(dev->dsa_ptr); #endif - - return 0; + return false; } /** @@ -1756,11 +1879,20 @@ struct napi_gro_cb { /* jiffies when first packet was created/queued */ unsigned long age; - /* Used in ipv6_gro_receive() */ + /* Used in ipv6_gro_receive() and foo-over-udp */ u16 proto; /* Used in udp_gro_receive */ - u16 udp_mark; + u8 udp_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -1787,7 +1919,6 @@ struct packet_type { struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); - int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); @@ -1801,6 +1932,7 @@ struct packet_offload { struct udp_offload { __be16 port; + u8 ipproto; struct offload_callbacks callbacks; }; @@ -1859,6 +1991,7 @@ struct pcpu_sw_netstats { #define NETDEV_CHANGEUPPER 0x0015 #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ +#define NETDEV_CHANGEINFODATA 0x0018 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -1951,8 +2084,8 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); @@ -2030,11 +2163,97 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(start, len, 0)); } +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return (skb->ip_summed != CHECKSUM_PARTIAL && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (__ret) \ + __skb_mark_checksum_bad(skb); \ + else \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -2138,12 +2357,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); void __netif_schedule(struct Qdisc *q); - -static inline void netif_schedule_queue(struct netdev_queue *txq) -{ - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) - __netif_schedule(txq->qdisc); -} +void netif_schedule_queue(struct netdev_queue *txq); static inline void netif_tx_schedule_all(struct net_device *dev) { @@ -2179,11 +2393,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) } } -static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) -{ - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue->qdisc); -} +void netif_tx_wake_queue(struct netdev_queue *dev_queue); /** * netif_wake_queue - restart transmit @@ -2271,6 +2481,34 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; } +/** + * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write + * @dev_queue: pointer to transmit queue + * + * BQL enabled drivers might use this helper in their ndo_start_xmit(), + * to give appropriate hint to the cpu. + */ +static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue) +{ +#ifdef CONFIG_BQL + prefetchw(&dev_queue->dql.num_queued); +#endif +} + +/** + * netdev_txq_bql_complete_prefetchw - prefetch bql data for write + * @dev_queue: pointer to transmit queue + * + * BQL enabled drivers might use this helper in their TX completion path, + * to give appropriate hint to the cpu. + */ +static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue) +{ +#ifdef CONFIG_BQL + prefetchw(&dev_queue->dql.limit); +#endif +} + static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { @@ -2455,19 +2693,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } -/** - * netif_wake_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Resume individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); -} +void netif_wake_subqueue(struct net_device *dev, u16 queue_index); #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, @@ -2486,7 +2712,7 @@ static inline int netif_set_xps_queue(struct net_device *dev, * as a distribution range limit for the returned value. */ static inline u16 skb_tx_hash(const struct net_device *dev, - const struct sk_buff *skb) + struct sk_buff *skb) { return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); } @@ -2631,8 +2857,9 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq); +struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); +struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, int *ret); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); @@ -2987,13 +3214,15 @@ void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); -#define alloc_netdev(sizeof_priv, name, setup) \ - alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) +#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) -#define alloc_netdev_mq(sizeof_priv, name, setup, count) \ - alloc_netdev_mqs(sizeof_priv, name, setup, count, count) +#define alloc_netdev_mq(sizeof_priv, name, name_assign_type, setup, count) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, count, \ + count) int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); @@ -3051,7 +3280,7 @@ static inline int __dev_uc_sync(struct net_device *dev, } /** - * __dev_uc_unsync - Remove synchonized addresses from device + * __dev_uc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * @@ -3095,7 +3324,7 @@ static inline int __dev_mc_sync(struct net_device *dev, } /** - * __dev_mc_unsync - Remove synchonized addresses from device + * __dev_mc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * @@ -3232,6 +3461,27 @@ int __init dev_proc_init(void); #define dev_proc_init() 0 #endif +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev, + bool more) +{ + skb->xmit_more = more ? 1 : 0; + return ops->ndo_start_xmit(skb, dev); +} + +static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, bool more) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int rc; + + rc = __netdev_start_xmit(ops, skb, dev, more); + if (rc == NETDEV_TX_OK) + txq_trans_update(txq); + + return rc; +} + int netdev_class_create_file_ns(struct class_attribute *class_attr, const void *ns); void netdev_class_remove_file_ns(struct class_attribute *class_attr, @@ -3369,6 +3619,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ +static inline void netif_keep_dst(struct net_device *dev) +{ + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM); +} + extern struct pernet_operations __net_initdata loopback_net_ops; /* Logging, debugging and troubleshooting/diagnostic helpers. */ @@ -3377,28 +3633,43 @@ extern struct pernet_operations __net_initdata loopback_net_ops; static inline const char *netdev_name(const struct net_device *dev) { - if (dev->reg_state != NETREG_REGISTERED) - return "(unregistered net_device)"; + if (!dev->name[0] || strchr(dev->name, '%')) + return "(unnamed net_device)"; return dev->name; } +static inline const char *netdev_reg_state(const struct net_device *dev) +{ + switch (dev->reg_state) { + case NETREG_UNINITIALIZED: return " (uninitialized)"; + case NETREG_REGISTERED: return ""; + case NETREG_UNREGISTERING: return " (unregistering)"; + case NETREG_UNREGISTERED: return " (unregistered)"; + case NETREG_RELEASED: return " (released)"; + case NETREG_DUMMY: return " (dummy)"; + } + + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + return " (unknown)"; +} + __printf(3, 4) -int netdev_printk(const char *level, const struct net_device *dev, - const char *format, ...); +void netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...); __printf(2, 3) -int netdev_emerg(const struct net_device *dev, const char *format, ...); +void netdev_emerg(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_alert(const struct net_device *dev, const char *format, ...); +void netdev_alert(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_crit(const struct net_device *dev, const char *format, ...); +void netdev_crit(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_err(const struct net_device *dev, const char *format, ...); +void netdev_err(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_warn(const struct net_device *dev, const char *format, ...); +void netdev_warn(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_notice(const struct net_device *dev, const char *format, ...); +void netdev_notice(const struct net_device *dev, const char *format, ...); __printf(2, 3) -int netdev_info(const struct net_device *dev, const char *format, ...); +void netdev_info(const struct net_device *dev, const char *format, ...); #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) @@ -3416,7 +3687,6 @@ do { \ ({ \ if (0) \ netdev_printk(KERN_DEBUG, __dev, format, ##args); \ - 0; \ }) #endif @@ -3438,7 +3708,8 @@ do { \ * file/line information and a backtrace. */ #define netdev_WARN(dev, format, args...) \ - WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args) + WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ + netdev_reg_state(dev), ##args) /* netif printk helpers, similar to netdev_printk */ |