From 6e7333d315a768170a59ac771297ee0551bdddbf Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 1 Feb 2016 18:51:05 -0500 Subject: net: add rx_nohandler stat counter This adds an rx_nohandler stat counter, along with a sysfs statistics node, and copies the counter out via netlink as well. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Daniel Borkmann CC: Tom Herbert CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289c2314d766..78a20cec2a0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1397,6 +1397,8 @@ enum netdev_priv_flags { * do not use this in drivers * @tx_dropped: Dropped packets by core network, * do not use this in drivers + * @rx_nohandler: nohandler dropped packets by core network on + * inactive devices, do not use this in drivers * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, @@ -1611,6 +1613,7 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; + atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; -- cgit v1.2.3 From ba905f5e2f63d86ed4cfbd3d9096fb28d156f1ee Mon Sep 17 00:00:00 2001 From: Kim Jones Date: Tue, 2 Feb 2016 03:51:16 +0000 Subject: ethtool: Declare netdev_rss_key as __read_mostly. netdev_rss_key is written to once and thereafter is read by drivers when they are initialising. The fact that it is mostly read and not written to makes it a candidate for a __read_mostly declaration. Signed-off-by: Kim Jones Signed-off-by: Alan Carey Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78a20cec2a0a..219f53c30cb3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3744,7 +3744,7 @@ void netdev_lower_state_changed(struct net_device *lower_dev, /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, -- cgit v1.2.3 From d4ab4286276fcd6c155bafdf4422b712068d2516 Mon Sep 17 00:00:00 2001 From: "Keller, Jacob E" Date: Mon, 8 Feb 2016 16:05:03 -0800 Subject: ethtool: correctly ensure {GS}CHANNELS doesn't conflict with GS{RXFH} Ethernet drivers implementing both {GS}RXFH and {GS}CHANNELS ethtool ops incorrectly allow SCHANNELS when it would conflict with the settings from SRXFH. This occurs because it is not possible for drivers to understand whether their Rx flow indirection table has been configured or is in the default state. In addition, drivers currently behave in various ways when increasing the number of Rx channels. Some drivers will always destroy the Rx flow indirection table when this occurs, whether it has been set by the user or not. Other drivers will attempt to preserve the table even if the user has never modified it from the default driver settings. Neither of these situation is desirable because it leads to unexpected behavior or loss of user configuration. The correct behavior is to simply return -EINVAL when SCHANNELS would conflict with the current Rx flow table settings. However, it should only do so if the current settings were modified by the user. If we required that the new settings never conflict with the current (default) Rx flow settings, we would force users to first reduce their Rx flow settings and then reduce the number of Rx channels. This patch proposes a solution implemented in net/core/ethtool.c which ensures that all drivers behave correctly. It checks whether the RXFH table has been configured to non-default settings, and stores this information in a private netdev flag. When the number of channels is requested to change, it first ensures that the current Rx flow table is not going to assign flows to now disabled channels. Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 219f53c30cb3..0499569c256d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1291,6 +1291,7 @@ struct net_device_ops { * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device + * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1318,6 +1319,7 @@ enum netdev_priv_flags { IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, IFF_TEAM = 1<<24, + IFF_RXFH_CONFIGURED = 1<<25, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1345,6 +1347,7 @@ enum netdev_priv_flags { #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM +#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED /** * struct net_device - The DEVICE structure. @@ -4048,6 +4051,11 @@ static inline bool netif_is_lag_port(const struct net_device *dev) return netif_is_bond_slave(dev) || netif_is_team_port(dev); } +static inline bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->priv_flags & IFF_RXFH_CONFIGURED; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3 From e4c6734eaab90695db0ea8456307790cb0c1ccb5 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:16:15 -0800 Subject: net: rework ndo tc op to consume additional qdisc handle parameter The ndo_setup_tc() op was added to support drivers offloading tx qdiscs however only support for mqprio was ever added. So we only ever added support for passing the number of traffic classes to the driver. This patch generalizes the ndo_setup_tc op so that a handle can be provided to indicate if the offload is for ingress or egress or potentially even child qdiscs. CC: Murali Karicheri CC: Shradha Shah CC: Or Gerlitz CC: Ariel Elior CC: Jeff Kirsher CC: Bruce Allan CC: Jesse Brandeburg CC: Don Skidmore Signed-off-by: John Fastabend Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0499569c256d..48928b6f9cb6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1150,7 +1151,7 @@ struct net_device_ops { int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); - int (*ndo_setup_tc)(struct net_device *dev, u8 tc); + int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); -- cgit v1.2.3 From 16e5cc647173a97e33b3e3ba81f73eb455561794 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:16:43 -0800 Subject: net: rework setup_tc ndo op to consume general tc operand This patch updates setup_tc so we can pass additional parameters into the ndo op in a generic way. To do this we provide structured union and type flag. This lets each classifier and qdisc provide its own set of attributes without having to add new ndo ops or grow the signature of the callback. Signed-off-by: John Fastabend Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 48928b6f9cb6..e396060f815f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,6 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +/* This structure holds attributes of qdisc and classifiers + * that are being passed to the netdevice through the setup_tc op. + */ +enum { + TC_SETUP_MQPRIO, +}; + +struct tc_to_netdev { + unsigned int type; + union { + u8 tc; + }; +}; + + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1151,7 +1166,10 @@ struct net_device_ops { int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); - int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc); + int (*ndo_setup_tc)(struct net_device *dev, + u32 handle, + __be16 protocol, + struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); -- cgit v1.2.3 From a1b7c5fd7fe98f51fbbc393ee1fc4c1cdb2f0119 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Feb 2016 21:17:09 -0800 Subject: net: sched: add cls_u32 offload hooks for netdevs This patch allows netdev drivers to consume cls_u32 offloads via the ndo_setup_tc ndo op. This works aligns with how network drivers have been doing qdisc offloads for mqprio. Signed-off-by: John Fastabend Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e396060f815f..47671ce04ac4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,17 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -/* This structure holds attributes of qdisc and classifiers +/* These structures hold the attributes of qdisc and classifiers * that are being passed to the netdevice through the setup_tc op. */ enum { TC_SETUP_MQPRIO, + TC_SETUP_CLSU32, }; +struct tc_cls_u32_offload; + struct tc_to_netdev { unsigned int type; union { u8 tc; + struct tc_cls_u32_offload *cls_u32; }; }; -- cgit v1.2.3 From 871b642adebe300be2e50aa5f65a418510f636ec Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Feb 2016 10:45:37 +0100 Subject: netdev: introduce ndo_set_rx_headroom This method allows the controlling device (i.e. the bridge) to specify additional headroom to be allocated for skb head on frame reception. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e52077ffe5ed..efe7cec111fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1093,6 +1093,12 @@ struct tc_to_netdev { * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while * sampling packet. + * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + * This function is used to specify the headroom that the skb must + * consider when allocation skb during packet reception. Setting + * appropriate rx headroom value allows avoiding skb head copy on + * forward. Setting a negative value reset the rx headroom to the + * default value. * */ struct net_device_ops { @@ -1278,6 +1284,8 @@ struct net_device_ops { bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); + void (*ndo_set_rx_headroom)(struct net_device *dev, + int needed_headroom); }; /** @@ -1315,6 +1323,8 @@ struct net_device_ops { * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured + * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external + * entity (i.e. the master device for bridged veth) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1343,6 +1353,7 @@ enum netdev_priv_flags { IFF_L3MDEV_SLAVE = 1<<23, IFF_TEAM = 1<<24, IFF_RXFH_CONFIGURED = 1<<25, + IFF_PHONY_HEADROOM = 1<<26, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1937,6 +1948,26 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); +/* returns the headroom that the master device needs to take in account + * when forwarding to this dev + */ +static inline unsigned netdev_get_fwd_headroom(struct net_device *dev) +{ + return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom; +} + +static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr) +{ + if (dev->netdev_ops->ndo_set_rx_headroom) + dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr); +} + +/* set the device rx headroom to the dev's default */ +static inline void netdev_reset_rx_headroom(struct net_device *dev) +{ + netdev_set_rx_headroom(dev, -1); +} + /* * Net namespace inlines */ -- cgit v1.2.3 From 2793a23aacbd754dbbb5cb75093deb7e4103bace Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Mar 2016 21:58:32 -0500 Subject: net: validate variable length ll headers Netdevice parameter hard_header_len is variously interpreted both as an upper and lower bound on link layer header length. The field is used as upper bound when reserving room at allocation, as lower bound when validating user input in PF_PACKET. Clarify the definition to be maximum header length. For validation of untrusted headers, add an optional validate member to header_ops. Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance for deliberate testing of corrupt input. In this case, pad trailing bytes, as some device drivers expect completely initialized headers. See also http://comments.gmane.org/gmane.linux.network/401064 Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index efe7cec111fa..fd30cb545c45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -268,6 +268,7 @@ struct header_ops { void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); + bool (*validate)(const char *ll_header, unsigned int len); }; /* These flag bits are private to the generic network queueing @@ -1459,8 +1460,7 @@ enum netdev_priv_flags { * @dma: DMA channel * @mtu: Interface MTU value * @type: Interface hardware type - * @hard_header_len: Hardware header length, which means that this is the - * minimum size of a packet. + * @hard_header_len: Maximum hardware header length. * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed @@ -2687,6 +2687,24 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } +/* ll_header must have at least hard_header_len allocated */ +static inline bool dev_validate_header(const struct net_device *dev, + char *ll_header, int len) +{ + if (likely(len >= dev->hard_header_len)) + return true; + + if (capable(CAP_SYS_RAWIO)) { + memset(ll_header + len, 0, dev->hard_header_len - len); + return true; + } + + if (dev->header_ops && dev->header_ops->validate) + return dev->header_ops->validate(ll_header, len); + + return false; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) -- cgit v1.2.3 From 5b33f48842fa1e13e9c0ea8cc59c1d0df19042db Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 8 Mar 2016 12:42:29 +0200 Subject: net/flower: Introduce hardware offload support This patch is based on a patch made by John Fastabend. It adds support for offloading cls_flower. when NETIF_F_HW_TC is on: flags = 0 => Rule will be processed twice - by hardware, and if still relevant, by software. flags = SKIP_HW => Rull will be processed by software only If hardware fail/not capabale to apply the rule, operation will NOT fail. Filter will be processed by SW only. Acked-by: Jiri Pirko Suggested-by: John Fastabend Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd30cb545c45..41df0b450757 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -786,6 +786,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, enum { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, + TC_SETUP_CLSFLOWER, }; struct tc_cls_u32_offload; @@ -795,6 +796,7 @@ struct tc_to_netdev { union { u8 tc; struct tc_cls_u32_offload *cls_u32; + struct tc_cls_flower_offload *cls_flower; }; }; -- cgit v1.2.3 From 3c17578473b9be5a6e7680a45ea97e1d56e13249 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 11 Mar 2016 18:07:32 +0100 Subject: net: add MACsec netdevice priv_flags and helper Signed-off-by: Sabrina Dubroca Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41df0b450757..be693b34662f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1328,6 +1328,7 @@ struct net_device_ops { * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) + * @IFF_MACSEC: device is a MACsec device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1357,6 +1358,7 @@ enum netdev_priv_flags { IFF_TEAM = 1<<24, IFF_RXFH_CONFIGURED = 1<<25, IFF_PHONY_HEADROOM = 1<<26, + IFF_MACSEC = 1<<27, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1385,6 +1387,7 @@ enum netdev_priv_flags { #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED +#define IFF_MACSEC IFF_MACSEC /** * struct net_device - The DEVICE structure. @@ -4045,6 +4048,11 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } +static inline bool netif_is_macsec(const struct net_device *dev) +{ + return dev->priv_flags & IFF_MACSEC; +} + static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; -- cgit v1.2.3