From 61e84623ace35ce48975e8f90bbbac7557c43d61 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 7 Oct 2016 22:04:33 -0400 Subject: net: centralize net_device min/max MTU checking While looking into an MTU issue with sfc, I started noticing that almost every NIC driver with an ndo_change_mtu function implemented almost exactly the same range checks, and in many cases, that was the only practical thing their ndo_change_mtu function was doing. Quite a few drivers have either 68, 64, 60 or 46 as their minimum MTU value checked, and then various sizes from 1500 to 65535 for their maximum MTU value. We can remove a whole lot of redundant code here if we simple store min_mtu and max_mtu in net_device, and check against those in net/core/dev.c's dev_set_mtu(). In theory, there should be zero functional change with this patch, it just puts the infrastructure in place. Subsequent patches will attempt to start using said infrastructure, with theoretically zero change in functionality. CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 136ae6bbe81e..fbdf923af4d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1506,6 +1506,8 @@ enum netdev_priv_flags { * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value + * @min_mtu: Interface Minimum MTU value + * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @@ -1726,6 +1728,8 @@ struct net_device { unsigned char dma; unsigned int mtu; + unsigned int min_mtu; + unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; -- cgit v1.2.3 From cf53b1da73bdf940f1523ec5a7d375d7056c759c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 11 Oct 2016 13:04:09 -0700 Subject: Revert "net: Add driver helper functions to determine checksum offloadability" This reverts commit 6ae23ad36253a8033c5714c52b691b84456487c5. The code has been in kernel since 4.4 but there are no in tree code that uses. Unused code is broken code, remove it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 78 ----------------------------------------------- 1 file changed, 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fbdf923af4d3..bf341b65ca5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2653,71 +2653,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } -struct skb_csum_offl_spec { - __u16 ipv4_okay:1, - ipv6_okay:1, - encap_okay:1, - ip_options_okay:1, - ext_hdrs_okay:1, - tcp_okay:1, - udp_okay:1, - sctp_okay:1, - vlan_okay:1, - no_encapped_ipv6:1, - no_not_encapped:1; -}; - -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help); - -static inline bool skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); -} - -static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec) -{ - bool csum_encapped; - - return skb_csum_offload_chk(skb, spec, &csum_encapped, true); -} - -static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .ipv6_okay = 1, - .vlan_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - -static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - .vlan_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3961,19 +3896,6 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } -/* Map an ethertype into IP protocol if possible */ -static inline int eproto_to_ipproto(int eproto) -{ - switch (eproto) { - case htons(ETH_P_IP): - return IPPROTO_IP; - case htons(ETH_P_IPV6): - return IPPROTO_IPV6; - default: - return -1; - } -} - #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else -- cgit v1.2.3 From c3aaa403840a5ccd305fb5e73f3cbfac6453b5e5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 14 Oct 2016 05:19:17 -0400 Subject: qed: Pass MAC hints to VFs Some hypervisors can support MAC hints to their VFs. Even though we don't have such a hypervisor API in linux, we add sufficient logic for the VF to be able to receive such hints and set the mac accordingly - as long as the VF has not been set with a MAC already. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 33c24ebc9b7f..1c779486c30d 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -129,7 +129,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; - void (*force_mac) (void *dev, u8 *mac); + void (*force_mac) (void *dev, u8 *mac, bool forced); }; #ifdef CONFIG_DCB -- cgit v1.2.3 From 7b7e70f979e34ed84d725eab8ea42921ab6f42e3 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 14 Oct 2016 05:19:20 -0400 Subject: qed*: Allow unicast filtering Apparently qede fails to set IFF_UNICAST_FLT, and as a result is not actually performing unicast MAC filtering. While we're at it - relax a hard-coded limitation that limits each interface into using at most 15 unicast MAC addresses before turning promiscuous. Instead utilize the HW resources to their limit. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 1c779486c30d..15130805d792 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -23,6 +23,7 @@ struct qed_dev_eth_info { u8 port_mac[ETH_ALEN]; u8 num_vlan_filters; + u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ bool is_legacy; -- cgit v1.2.3 From 664fcf123a30edf16b47d2ce1f610d654ba917b2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 16 Oct 2016 19:56:51 +0200 Subject: net: phy: Threaded interrupts allow some simplification The PHY interrupts are now handled in a threaded interrupt handler, which can sleep. The work queue is no longer needed, phy_change() can be called directly. phy_mac_interrupt() still needs to be safe to call in interrupt context, so keep the work queue, and use a helper to call phy_change(). Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e25f1830fbcf..c47378c93607 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -343,7 +343,7 @@ struct phy_c45_device_ids { * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine - * phy_queue: A work_queue for the interrupt + * phy_queue: A work_queue for the phy_mac_interrupt * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -802,7 +802,8 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct work_struct *work); +void phy_change(struct phy_device *phydev); +void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From 1a3f060c1a47dba4e12ac21ce62b57666b9c4e95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Oct 2016 19:15:44 -0700 Subject: net: Introduce new api for walking upper and lower devices This patch introduces netdev_walk_all_upper_dev_rcu, netdev_walk_all_lower_dev and netdev_walk_all_lower_dev_rcu. These functions recursively walk the adj_list of devices to determine all upper and lower devices. The functions take a callback function that is invoked for each device in the list. If the callback returns non-0, the walk is terminated and the functions return that code back to callers. v3 - simplified netdev_has_upper_dev_all_rcu and __netdev_has_upper_dev and removed typecast as suggested by Stephen v2 - fixed definition of netdev_next_lower_dev_rcu to mirror the upper_dev version. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf341b65ca5e..a5902d995907 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3778,6 +3778,14 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); + void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, @@ -3821,6 +3829,15 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, ldev; \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); -- cgit v1.2.3 From f1170fd462c67c4ae2f20734566d94e0f8f62f69 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Oct 2016 19:15:51 -0700 Subject: net: Remove all_adj_list and its references Only direct adjacencies are maintained. All upper or lower devices can be learned via the new walk API which recursively walks the adj_list for upper devices or lower devices. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a5902d995907..458c87631e7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,7 +1456,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1675,11 +1674,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -3771,13 +3765,6 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) - int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *upper_dev, void *data), @@ -3817,18 +3804,6 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), -- cgit v1.2.3 From 1f9127caece42514a47011326b83ad93d95cd5d7 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2016 10:49:54 -0500 Subject: net: phy: Create phy_supported_speeds function which lists speeds currently supported by a phydevice phy_supported_speeds provides a means to get a list of all the speeds a phy device currently supports. Signed-off-by: Zach Brown Signed-off-by: David S. Miller --- include/linux/phy.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index c47378c93607..4b6c246c63bb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -84,6 +84,21 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; +/** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and + * fills the speeds * buffer with the supported speeds. If speeds buffer is + * too small to contain * all currently supported speeds, will return as + * many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size); + /** * It maps 'enum phy_interface_t' found in include/linux/phy.h * into the device tree binding of 'phy-mode', so that Ethernet -- cgit v1.2.3 From 2e0bc452f4721520502575362a9cd3c1248d2337 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2016 10:49:55 -0500 Subject: net: phy: leds: add support for led triggers on phy link state change Create an option CONFIG_LED_TRIGGER_PHY (default n), which will create a set of led triggers for each instantiated PHY device. There is one LED trigger per link-speed, per-phy. The triggers are registered during phy_attach and unregistered during phy_detach. This allows for a user to configure their system to allow a set of LEDs not controlled by the phy to represent link state changes on the phy. LEDS controlled by the phy are unaffected. For example, we have a board where some of the leds in the RJ45 socket are controlled by the phy, but others are not. Using the triggers provided by this patch the leds not controlled by the phy can be configured to show the current speed of the ethernet connection. The leds controlled by the phy are unaffected. Signed-off-by: Josh Cartwright Signed-off-by: Nathan Sullivan Signed-off-by: Zach Brown Signed-off-by: David S. Miller --- include/linux/phy.h | 7 ++++++ include/linux/phy_led_triggers.h | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 include/linux/phy_led_triggers.h (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4b6c246c63bb..e7e1fd382564 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -420,6 +421,12 @@ struct phy_device { int link_timeout; +#ifdef CONFIG_LED_TRIGGER_PHY + struct phy_led_trigger *phy_led_triggers; + unsigned int phy_num_led_triggers; + struct phy_led_trigger *last_triggered; +#endif + /* * Interrupt number for this PHY * -1 means no interrupt diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h new file mode 100644 index 000000000000..a2daea0a37d2 --- /dev/null +++ b/include/linux/phy_led_triggers.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __PHY_LED_TRIGGERS +#define __PHY_LED_TRIGGERS + +struct phy_device; + +#ifdef CONFIG_LED_TRIGGER_PHY + +#include + +#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 +#define PHY_MII_BUS_ID_SIZE (20 - 3) + +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ + FIELD_SIZEOF(struct mdio_device, addr)+\ + PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) + +struct phy_led_trigger { + struct led_trigger trigger; + char name[PHY_LINK_LED_TRIGGER_NAME_SIZE]; + unsigned int speed; +}; + + +extern int phy_led_triggers_register(struct phy_device *phy); +extern void phy_led_triggers_unregister(struct phy_device *phy); +extern void phy_led_trigger_change_speed(struct phy_device *phy); + +#else + +static inline int phy_led_triggers_register(struct phy_device *phy) +{ + return 0; +} +static inline void phy_led_triggers_unregister(struct phy_device *phy) { } +static inline void phy_led_trigger_change_speed(struct phy_device *phy) { } + +#endif + +#endif -- cgit v1.2.3 From 9a97434215819872b054c3d0c067e5e4fa768b0e Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 17 Oct 2016 21:45:29 +0200 Subject: ARM: pxa: enhance smc91x platform data Instead of having the smc91x driver relying on machine_is_*() calls, provide this data through platform data, ie. idp, mainstone and stargate. This way, the driver doesn't need anymore machine_is_*() calls, which wouldn't work anymore with a device-tree build. Signed-off-by: Robert Jarzmik Signed-off-by: David S. Miller --- include/linux/smc91x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index e302c447e057..129bc674dcf5 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -39,6 +39,7 @@ struct smc91x_platdata { unsigned long flags; unsigned char leda; unsigned char ledb; + bool pxa_u16_align4; /* PXA buggy u16 writes on 4*n+2 addresses */ }; #endif /* __SMC91X_H__ */ -- cgit v1.2.3 From 57a09bf0a416700676e77102c28f9cfcb48267e0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 18 Oct 2016 19:51:19 +0200 Subject: bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers A BPF program is required to check the return register of a map_elem_lookup() call before accessing memory. The verifier keeps track of this by converting the type of the result register from PTR_TO_MAP_VALUE_OR_NULL to PTR_TO_MAP_VALUE after a conditional jump ensures safety. This check is currently exclusively performed for the result register 0. In the event the compiler reorders instructions, BPF_MOV64_REG instructions may be moved before the conditional jump which causes them to keep their type PTR_TO_MAP_VALUE_OR_NULL to which the verifier objects when the register is accessed: 0: (b7) r1 = 10 1: (7b) *(u64 *)(r10 -8) = r1 2: (bf) r2 = r10 3: (07) r2 += -8 4: (18) r1 = 0x59c00000 6: (85) call 1 7: (bf) r4 = r0 8: (15) if r0 == 0x0 goto pc+1 R0=map_value(ks=8,vs=8) R4=map_value_or_null(ks=8,vs=8) R10=fp 9: (7a) *(u64 *)(r4 +0) = 0 R4 invalid mem access 'map_value_or_null' This commit extends the verifier to keep track of all identical PTR_TO_MAP_VALUE_OR_NULL registers after a map_elem_lookup() by assigning them an ID and then marking them all when the conditional jump is observed. Signed-off-by: Thomas Graf Reviewed-by: Josef Bacik Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..ac5b393ee6b2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -23,13 +23,13 @@ struct bpf_reg_state { * result in a bad access. */ u64 min_value, max_value; + u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; /* valid when type == PTR_TO_PACKET* */ struct { - u32 id; u16 off; u16 range; }; -- cgit v1.2.3 From 8b6b4135e4fb2b537f33b811c13f77bee25ca8d3 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 20 Oct 2016 13:55:19 -0400 Subject: net: use core MTU range checking in WAN drivers - set min/max_mtu in all hdlc drivers, remove hdlc_change_mtu - sent max_mtu in lec driver, remove lec_change_mtu - set min/max_mtu in x25_asy driver CC: netdev@vger.kernel.org CC: Krzysztof Halasa CC: Krzysztof Halasa CC: Jan "Yenya" Kasprzak CC: Francois Romieu CC: Kevin Curtis CC: Zhao Qiang Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/hdlc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e31bcd4c7859..97585d9679f3 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -93,8 +93,6 @@ static __inline__ void debug_frame(const struct sk_buff *skb) int hdlc_open(struct net_device *dev); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev); -/* May be used by hardware driver */ -int hdlc_change_mtu(struct net_device *dev, int new_mtu); /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); -- cgit v1.2.3 From b3e3893e1253692c3d2b8e8ebd5a26183ed30443 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 20 Oct 2016 13:55:22 -0400 Subject: net: use core MTU range checking in misc drivers firewire-net: - set min/max_mtu - remove fwnet_change_mtu nes: - set max_mtu - clean up nes_netdev_change_mtu xpnet: - set min/max_mtu - remove xpnet_dev_change_mtu hippi: - set min/max_mtu - remove hippi_change_mtu batman-adv: - set max_mtu - remove batadv_interface_change_mtu - initialization is a little async, not 100% certain that max_mtu is set in the optimal place, don't have hardware to test with rionet: - set min/max_mtu - remove rionet_change_mtu slip: - set min/max_mtu - streamline sl_change_mtu um/net_kern: - remove pointless ndo_change_mtu hsi/clients/ssi_protocol: - use core MTU range checking - remove now redundant ssip_pn_set_mtu ipoib: - set a default max MTU value - Note: ipoib's actual max MTU can vary, depending on if the device is in connected mode or not, so we'll just set the max_mtu value to the max possible, and let the ndo_change_mtu function continue to validate any new MTU change requests with checks for CM or not. Note that ipoib has no min_mtu set, and thus, the network core's mtu > 0 check is the only lower bounds here. mptlan: - use net core MTU range checking - remove now redundant mpt_lan_change_mtu fddi: - min_mtu = 21, max_mtu = 4470 - remove now redundant fddi_change_mtu (including export) fjes: - min_mtu = 8192, max_mtu = 65536 - The max_mtu value is actually one over IP_MAX_MTU here, but the idea is to get past the core net MTU range checks so fjes_change_mtu can validate a new MTU against what it supports (see fjes_support_mtu in fjes_hw.c) hsr: - min_mtu = 0 (calls ether_setup, max_mtu is 1500) f_phonet: - min_mtu = 6, max_mtu = 65541 u_ether: - min_mtu = 14, max_mtu = 15412 phonet/pep-gprs: - min_mtu = 576, max_mtu = 65530 - remove redundant gprs_set_mtu CC: netdev@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: Stefan Richter CC: Faisal Latif CC: linux-rdma@vger.kernel.org CC: Cliff Whickman CC: Robin Holt CC: Jes Sorensen CC: Marek Lindner CC: Simon Wunderlich CC: Antonio Quartulli CC: Sathya Prakash CC: Chaitra P B CC: Suganath Prabu Subramani CC: MPT-FusionLinux.pdl@broadcom.com CC: Sebastian Reichel CC: Felipe Balbi CC: Arvid Brodin CC: Remi Denis-Courmont Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/fddidevice.h | 1 - include/linux/hippidevice.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 9a79f0106da1..32c22cfb238b 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -26,7 +26,6 @@ #ifdef __KERNEL__ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); -int fddi_change_mtu(struct net_device *dev, int new_mtu); struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 8ec23fb0b412..402f99e328d4 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,6 @@ struct hippi_cb { }; __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -int hippi_change_mtu(struct net_device *dev, int new_mtu); int hippi_mac_addr(struct net_device *dev, void *p); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); struct net_device *alloc_hippi_dev(int sizeof_priv); -- cgit v1.2.3 From 2d0e30c30f84d08dc16f0f2af41f1b8a85f0755e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Oct 2016 12:46:33 +0200 Subject: bpf: add helper for retrieving current numa node id Use case is mainly for soreuseport to select sockets for the local numa node, but since generic, lets also add this for other networking and tracing program types. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c201017b5730..edcd96ded8aa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -319,6 +319,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; -- cgit v1.2.3 From 3cf25904fe467aebeaa77d402b6cf3c6c5d6303b Mon Sep 17 00:00:00 2001 From: Xo Wang Date: Fri, 21 Oct 2016 10:20:12 -0700 Subject: net: phy: broadcom: Update Auxiliary Control Register macros Add the RXD-to-RXC skew (delay) time bit in the Miscellaneous Control shadow register and a mask for the shadow selector field. Remove a re-definition of MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL. Signed-off-by: Xo Wang Reviewed-by: Florian Fainelli Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index e3354b74286c..22c4421c916c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -105,11 +105,12 @@ #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 /* * Broadcom LED source encodings. These are used in BCM5461, BCM5481, -- cgit v1.2.3 From d92ead16be405b6d52ff7b366d1c9865ccc684bd Mon Sep 17 00:00:00 2001 From: Xo Wang Date: Fri, 21 Oct 2016 10:20:13 -0700 Subject: net: phy: broadcom: Add support for BCM54612E This PHY has internal delays enabled after reset. This clears the internal delay enables unless the interface specifically requests them. Signed-off-by: Xo Wang Reviewed-by: Florian Fainelli Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 22c4421c916c..60def78c4e12 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -18,6 +18,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 -- cgit v1.2.3 From 3f817fe718c6cb3ddcc2ab04ba86faecc20ef8fe Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:01 +0300 Subject: cfg80211: Define IEEE P802.11ai (FILS) information elements Define the Element IDs and Element ID Extensions from IEEE P802.11ai/D11.0. In addition, add a new cfg80211_find_ext_ie() wrapper to make it easier to find information elements that used the Element ID Extension field. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a80516fd65c8..d428adf51446 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1960,6 +1960,26 @@ enum ieee80211_eid { WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_CAG_NUMBER = 237, + WLAN_EID_AP_CSN = 239, + WLAN_EID_FILS_INDICATION = 240, + WLAN_EID_DILS = 241, + WLAN_EID_FRAGMENT = 242, + WLAN_EID_EXTENSION = 255 +}; + +/* Element ID Extensions for Element ID 255 */ +enum ieee80211_eid_ext { + WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, + WLAN_EID_EXT_FILS_REQ_PARAMS = 2, + WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, + WLAN_EID_EXT_FILS_SESSION = 4, + WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, + WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, + WLAN_EID_EXT_KEY_DELIVERY = 7, + WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, + WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, + WLAN_EID_EXT_FILS_NONCE = 13, }; /* Action category code */ -- cgit v1.2.3 From 631810603a20874554b2f17adf42b72d0f15eda5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:02 +0300 Subject: cfg80211: Add Fast Initial Link Setup (FILS) auth algs This defines authentication algorithms for FILS (IEEE 802.11ai). Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d428adf51446..793a0174ba29 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation { #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 +#define WLAN_AUTH_FILS_SK 4 +#define WLAN_AUTH_FILS_SK_PFS 5 +#define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 -- cgit v1.2.3 From 348bd456699801920a309c66e382380809fbdf41 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:03 +0300 Subject: cfg80211: Add KEK/nonces for FILS association frames The new nl80211 attributes can be used to provide KEK and nonces to allow the driver to encrypt and decrypt FILS (Re)Association Request/Response frames in station mode. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 793a0174ba29..fe849329511a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2096,6 +2096,9 @@ enum ieee80211_key_len { #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 +#define FILS_NONCE_LEN 16 +#define FILS_MAX_KEK_LEN 64 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, -- cgit v1.2.3 From a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:02 +0200 Subject: genetlink: no longer support using static family IDs Static family IDs have never really been used, the only use case was the workaround I introduced for those users that assumed their family ID was also their multicast group ID. Additionally, because static family IDs would never be reserved by the generic netlink code, using a relatively low ID would only work for built-in families that can be registered immediately after generic netlink is started, which is basically only the control family (apart from the workaround code, which I also had to add code for so it would reserve those IDs) Thus, anything other than GENL_ID_GENERATE is flawed and luckily not used except in the cases I mentioned. Move those workarounds into a few lines of code, and then get rid of GENL_ID_GENERATE entirely, making it more robust. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/genl_magic_func.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 667c31101b8b..7c070c1fe457 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -260,7 +260,6 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) static struct genl_family ZZZ_genl_family __read_mostly = { - .id = GENL_ID_GENERATE, .name = __stringify(GENL_MAGIC_FAMILY), .version = GENL_MAGIC_VERSION, #ifdef GENL_MAGIC_FAMILY_HDRSZ -- cgit v1.2.3 From 489111e5c25b93be80340c3113d71903d7c82136 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:03 +0200 Subject: genetlink: statically initialize families Instead of providing macros/inline functions to initialize the families, make all users initialize them statically and get rid of the macros. This reduces the kernel code size by about 1.6k on x86-64 (with allyesconfig). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index c934d3a96b5e..2896f93808ae 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -67,7 +67,7 @@ * genl_magic_func.h * generates an entry in the static genl_ops array, * and static register/unregister functions to - * genl_register_family_with_ops(). + * genl_register_family(). * * flags and handler: * GENL_op_init( .doit = x, .dumpit = y, .flags = something) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 7c070c1fe457..40c2e39362c8 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -259,15 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * {{{2 */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) -static struct genl_family ZZZ_genl_family __read_mostly = { - .name = __stringify(GENL_MAGIC_FAMILY), - .version = GENL_MAGIC_VERSION, -#ifdef GENL_MAGIC_FAMILY_HDRSZ - .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), -#endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, -}; - +static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper @@ -301,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) +static struct genl_family ZZZ_genl_family __read_mostly = { + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .ops = ZZZ_genl_ops, + .n_ops = ARRAY_SIZE(ZZZ_genl_ops), + .mcgrps = ZZZ_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), + .module = THIS_MODULE, +}; + int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) { - return genl_register_family_with_ops_groups(&ZZZ_genl_family, \ - ZZZ_genl_ops, \ - ZZZ_genl_mcgrps); + return genl_register_family(&ZZZ_genl_family); } void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) -- cgit v1.2.3 From 56989f6d8568c21257dcec0f5e644d5570ba3281 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:05 +0200 Subject: genetlink: mark families as __ro_after_init Now genl_register_family() is the only thing (other than the users themselves, perhaps, but I didn't find any doing that) writing to the family struct. In all families that I found, genl_register_family() is only called from __init functions (some indirectly, in which case I've add __init annotations to clarifly things), so all can actually be marked __ro_after_init. This protects the data structure from accidental corruption. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/genl_magic_func.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 40c2e39362c8..377257d8f7e3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -293,7 +293,7 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) -static struct genl_family ZZZ_genl_family __read_mostly = { +static struct genl_family ZZZ_genl_family __ro_after_init = { .name = __stringify(GENL_MAGIC_FAMILY), .version = GENL_MAGIC_VERSION, #ifdef GENL_MAGIC_FAMILY_HDRSZ -- cgit v1.2.3 From b917783c7b350518f8c5d88bb5848aa8064408a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 18:49:46 +0200 Subject: flow_dissector: __skb_get_hash_symmetric arg can be const Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f6e621..663fda2887f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1086,7 +1086,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); -- cgit v1.2.3 From 5579e1519bad43b874922dbe87c74fdcbd97a7db Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 31 Aug 2016 05:17:54 +0000 Subject: net/mlx5: Update struct mlx5_ifc_xrqc_bits Update struct mlx5_ifc_xrqc_bits according to last specification Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d58065..12f72e45a3f0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2844,7 +2844,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x200]; + u8 reserved_at_180[0x880]; struct mlx5_ifc_wq_bits wq; }; -- cgit v1.2.3 From dd257efb1e0f8875ed7e42b88837a8dada0d0e41 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 31 Aug 2016 05:29:58 +0000 Subject: net/mlx5: Ensure SRQ physical address structure endianness SRQ physical address structure field should be in big-endian format. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky --- include/linux/mlx5/srq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 33c97dc900f8..1cde0fd53f90 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -55,7 +55,7 @@ struct mlx5_srq_attr { u32 lwm; u32 user_index; u64 db_record; - u64 *pas; + __be64 *pas; }; struct mlx5_core_dev; -- cgit v1.2.3 From 813f854053c26204e2723c498def4c7870dcc7f4 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 11 Aug 2016 11:21:39 +0300 Subject: net/mlx5: Introduce TSAR manipulation firmware commands TSAR (stands for Transmit Scheduling ARbiter) is a hardware component that is responsible for selecting the next entity to serve on the transmit path. The arbitration defines the QoS policy between the agents connected to the TSAR. The TSAR is a consist two main features: 1) BW Allocation between agents: The TSAR implements a defecit weighted round robin between the agents. Each agent attached to the TSAR is assigned with a weight and it is awarded transmission tokens according to this weight. 2) Rate limer per agent: Each agent attached to the TSAR is (optionally) assigned with a rate limit. TSAR will not allow scheduling for an agent exceeding its defined rate limit. In this patch we implement the API of manipulating the TSAR. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 199 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 195 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 12f72e45a3f0..2632cb2caf10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -145,6 +145,12 @@ enum { MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, + MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT = 0x784, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT = 0x785, + MLX5_CMD_OP_CREATE_QOS_PARA_VPORT = 0x786, + MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT = 0x787, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -537,13 +543,27 @@ struct mlx5_ifc_e_switch_cap_bits { struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; - u8 reserved_0[0x1f]; - u8 reserved_1[0x20]; + u8 esw_scheduling[0x1]; + u8 reserved_at_2[0x1e]; + + u8 reserved_at_20[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; - u8 reserved_2[0x10]; + + u8 reserved_at_80[0x10]; u8 packet_pacing_rate_table_size[0x10]; - u8 reserved_3[0x760]; + + u8 esw_element_type[0x10]; + u8 esw_tsar_type[0x10]; + + u8 reserved_at_c0[0x10]; + u8 max_qos_para_vport[0x10]; + + u8 max_tsar_bw_share[0x20]; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { @@ -2333,6 +2353,30 @@ struct mlx5_ifc_sqc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, + SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, +}; + +struct mlx5_ifc_scheduling_context_bits { + u8 element_type[0x8]; + u8 reserved_at_8[0x18]; + + u8 element_attributes[0x20]; + + u8 parent_element_id[0x20]; + + u8 reserved_at_60[0x40]; + + u8 bw_share[0x20]; + + u8 max_average_bw[0x20]; + + u8 reserved_at_e0[0x120]; +}; + struct mlx5_ifc_rqtc_bits { u8 reserved_at_0[0xa0]; @@ -2920,6 +2964,29 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3540,6 +3607,39 @@ struct mlx5_ifc_query_special_contexts_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_query_scheduling_element_out_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xc0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + +enum { + SCHEDULING_HIERARCHY_E_SWITCH = 0x2, +}; + +struct mlx5_ifc_query_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4725,6 +4825,43 @@ struct mlx5_ifc_modify_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_modify_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +enum { + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2, +}; + +struct mlx5_ifc_modify_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x20]; + + u8 modify_bitmask[0x20]; + + u8 reserved_at_c0[0x40]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5390,6 +5527,30 @@ struct mlx5_ifc_destroy_sq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +struct mlx5_ifc_destroy_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6017,6 +6178,36 @@ struct mlx5_ifc_create_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_create_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_create_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 74491de937125d0c98c9b9c9208b4105717a3caa Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 31 Aug 2016 11:24:25 +0000 Subject: net/mlx5: Add multi dest support Currently when calling mlx5_add_flow_rule we accept only one flow destination, this commit allows to pass multiple destinations. This change forces us to change the return structure to a more flexible one. We introduce a flow handle (struct mlx5_flow_handle), it holds internally the number for rules created and holds an array where each cell points the to a flow rule. From the consumers (of mlx5_add_flow_rule) point of view this change is only cosmetic and requires only to change the type of the returned value they store. From the core point of view, we now need to use a loop when allocating and deleting rules (e.g given to us a flow handler). Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 93ebc5e21334..0dcd287f4bd0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -69,8 +69,8 @@ enum mlx5_flow_namespace_type { struct mlx5_flow_table; struct mlx5_flow_group; -struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_handle; struct mlx5_flow_spec { u8 match_criteria_enable; @@ -127,18 +127,20 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); /* Single destination per rule. * Group ID is implied by the match criteria. */ -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest); -void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); - -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest); - -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest, + int dest_num); +void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest); + +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From c62cce2caee558e18aa05c01c2fd3b40f07174f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 24 Oct 2016 18:29:13 -0700 Subject: net: add an ioctl to get a socket network namespace Each socket operates in a network namespace where it has been created, so if we want to dump and restore a socket, we have to know its network namespace. We have a socket_diag to get information about sockets, it doesn't report sockets which are not bound or connected. This patch introduces a new socket ioctl, which is called SIOCGSKNS and used to get a file descriptor for a socket network namespace. A task must have CAP_NET_ADMIN in a target network namespace to use this ioctl. Cc: "David S. Miller" Cc: Eric W. Biederman Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/proc_fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b97bf2ef996e..368c7ad06ae5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir( return proc_mkdir_data(name, 0, parent, net); } +struct ns_common; +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)); + #endif /* _LINUX_PROC_FS_H */ -- cgit v1.2.3 From 9cf1f6a8c4cbb7836b838b51b3b02ddf32c6c6a0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:43:20 -0400 Subject: net: Move functions for configuring traffic classes out of inline headers The functions for configuring the traffic class to queue mappings have other effects that need to be addressed. Instead of trying to export a bunch of new functions just relocate the functions so that we can instrument them directly with the functionality they will need. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20ce8df115ac..e05ab3bd48d2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1920,34 +1920,9 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } -static inline -void netdev_reset_tc(struct net_device *dev) -{ - dev->num_tc = 0; - memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); - memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); -} - -static inline -int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) -{ - if (tc >= dev->num_tc) - return -EINVAL; - - dev->tc_to_txq[tc].count = count; - dev->tc_to_txq[tc].offset = offset; - return 0; -} - -static inline -int netdev_set_num_tc(struct net_device *dev, u8 num_tc) -{ - if (num_tc > TC_MAX_QUEUE) - return -EINVAL; - - dev->num_tc = num_tc; - return 0; -} +void netdev_reset_tc(struct net_device *dev); +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); +int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) -- cgit v1.2.3 From 8d059b0f6f5b1d3acf829454e1087818ad660058 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:43:49 -0400 Subject: net: Add sysfs value to determine queue traffic class Add a sysfs attribute for a Tx queue that allows us to determine the traffic class for a given queue. This will allow us to more easily determine this in the future. It is needed as XPS will take the traffic class for a group of queues into account in order to avoid pulling traffic from one traffic class into another. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e05ab3bd48d2..d91a41860614 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1920,6 +1920,7 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); void netdev_reset_tc(struct net_device *dev); int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); int netdev_set_num_tc(struct net_device *dev, u8 num_tc); -- cgit v1.2.3 From 184c449f91fef521042970cca46bd5cdfc0e3a37 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:50:13 -0400 Subject: net: Add support for XPS with QoS via traffic classes This patch adds support for setting and using XPS when QoS via traffic classes is enabled. With this change we will factor in the priority and traffic class mapping of the packet and use that information to correctly select the queue. This allows us to define a set of queues for a given traffic class via mqprio and then configure the XPS mapping for those queues so that the traffic flows can avoid head-of-line blocking between the individual CPUs if so desired. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d91a41860614..66fd61c681d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -732,8 +732,8 @@ struct xps_dev_maps { struct rcu_head rcu; struct xps_map __rcu *cpu_map[0]; }; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) +#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 -- cgit v1.2.3 From 0fefbfbaad298162737d5418eb85065879f99b3e Mon Sep 17 00:00:00 2001 From: Sudarsana Kalluru Date: Mon, 31 Oct 2016 07:14:21 +0200 Subject: qed*: Management firmware - notifications and defaults Management firmware is interested in various tidbits about the driver - including the driver state & several configuration related fields [MTU, primtary MAC, etc.]. This adds the necessray logic to update MFW with such configurations, some of which are passed directly via qed while for others APIs are provide so that qede would be able to later configure if needed. This also introduces a new default configuration for MTU which would replace the default inherited by being an ethernet device. Signed-off-by: Sudarsana Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8978a60371f4..5c909cd02764 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -267,6 +267,7 @@ struct qed_dev_info { u8 mf_mode; bool tx_switching; bool rdma_supported; + u16 mtu; }; enum qed_sb_type { @@ -554,6 +555,33 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief update_drv_state - API to inform the change in the driver state. + * + * @param cdev + * @param active + * + */ + int (*update_drv_state)(struct qed_dev *cdev, bool active); + +/** + * @brief update_mac - API to inform the change in the mac address + * + * @param cdev + * @param mac + * + */ + int (*update_mac)(struct qed_dev *cdev, u8 *mac); + +/** + * @brief update_mtu - API to inform the change in the mtu + * + * @param cdev + * @param mtu + * + */ + int (*update_mtu)(struct qed_dev *cdev, u16 mtu); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 7a4b21b7d1f0644456501e33d3917c9aaee76a75 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 31 Oct 2016 07:14:22 +0200 Subject: qed: Add nvram selftest Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5c909cd02764..ffc2d2f5e88f 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -402,6 +402,15 @@ struct qed_selftest_ops { * @return 0 on success, error otherwise. */ int (*selftest_clock)(struct qed_dev *cdev); + +/** + * @brief selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_nvram) (struct qed_dev *cdev); }; struct qed_common_ops { -- cgit v1.2.3 From 14d39648cbfc6289e3f873d30f282b9517ebe860 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 31 Oct 2016 07:14:23 +0200 Subject: qed*: Add support for WoL Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ffc2d2f5e88f..ea095b4893aa 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -268,6 +268,8 @@ struct qed_dev_info { bool tx_switching; bool rdma_supported; u16 mtu; + + bool wol_support; }; enum qed_sb_type { @@ -591,6 +593,14 @@ struct qed_common_ops { * */ int (*update_mtu)(struct qed_dev *cdev, u16 mtu); + +/** + * @brief update_wol - update of changes in the WoL configuration + * + * @param cdev + * @param enabled - true iff WoL should be enabled. + */ + int (*update_wol) (struct qed_dev *cdev, bool enabled); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 2edbff8dcb5da324fd4c4fe953629e4f6ca73c99 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 31 Oct 2016 07:14:27 +0200 Subject: qed: Learn resources from management firmware Currently, each interfaces assumes it receives an equal portion of HW/FW resources, but this is wasteful - different partitions [and specifically, parititions exposing different protocol support] might require different resources. Implement a new resource learning scheme where the information is received directly from the management firmware [which has knowledge of all of the functions and can serve as arbiter]. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 15130805d792..9755a3feb52e 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -22,7 +22,7 @@ struct qed_dev_eth_info { u8 num_tc; u8 port_mac[ETH_ALEN]; - u8 num_vlan_filters; + u16 num_vlan_filters; u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ -- cgit v1.2.3 From 556d299fcb4af8f2e8eacf311c4eee352c746788 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:02 +0100 Subject: net: pim: add common pimhdr struct and helpers Add the common pimhdr structure and helpers to access it, also cleanup the format of the header file. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index e1d756f81348..354235a2691b 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PIM_H #define __LINUX_PIM_H +#include #include /* Message types - V1 */ @@ -13,20 +14,47 @@ #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) -static inline bool ipmr_pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} +/* RFC7761, sec 4.9: + * The PIM header common to all PIM messages is: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |PIM Ver| Type | Reserved | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct pimhdr { + __u8 type; + __u8 reserved; + __be16 csum; +}; /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ -struct pimreghdr -{ +struct pimreghdr { __u8 type; __u8 reserved; __be16 csum; __be32 flags; }; -struct sk_buff; -extern int pim_rcv_v1(struct sk_buff *); +int pim_rcv_v1(struct sk_buff *skb); + +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + +static inline struct pimhdr *pim_hdr(const struct sk_buff *skb) +{ + return (struct pimhdr *)skb_transport_header(skb); +} + +static inline u8 pim_hdr_version(const struct pimhdr *pimhdr) +{ + return pimhdr->type >> 4; +} + +static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) +{ + return pimhdr->type & 0xf; +} #endif -- cgit v1.2.3 From 20bb6ce9879e19eee7539329eaa2408d12b00306 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:03 +0100 Subject: net: pim: add a helper to check for IPv4 all pim routers address Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index 354235a2691b..1b6c0dbba94e 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -57,4 +57,10 @@ static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) { return pimhdr->type & 0xf; } + +/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */ +static inline bool pim_ipv4_all_pim_routers(__be32 addr) +{ + return addr == htonl(0xE000000D); +} #endif -- cgit v1.2.3 From 56245cae19f5ccb371fa63b09bb6b9ce7c0f1266 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:04 +0100 Subject: net: pim: add all RFC7761 message types Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index 1b6c0dbba94e..0e81b2778ae0 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -10,7 +10,36 @@ /* Message types - V2 */ #define PIM_VERSION 2 -#define PIM_REGISTER 1 + +/* RFC7761, sec 4.9: + * Type + * Types for specific PIM messages. PIM Types are: + * + * Message Type Destination + * --------------------------------------------------------------------- + * 0 = Hello Multicast to ALL-PIM-ROUTERS + * 1 = Register Unicast to RP + * 2 = Register-Stop Unicast to source of Register + * packet + * 3 = Join/Prune Multicast to ALL-PIM-ROUTERS + * 4 = Bootstrap Multicast to ALL-PIM-ROUTERS + * 5 = Assert Multicast to ALL-PIM-ROUTERS + * 6 = Graft (used in PIM-DM only) Unicast to RPF'(S) + * 7 = Graft-Ack (used in PIM-DM only) Unicast to source of Graft + * packet + * 8 = Candidate-RP-Advertisement Unicast to Domain's BSR + */ +enum { + PIM_TYPE_HELLO, + PIM_TYPE_REGISTER, + PIM_TYPE_REGISTER_STOP, + PIM_TYPE_JOIN_PRUNE, + PIM_TYPE_BOOTSTRAP, + PIM_TYPE_ASSERT, + PIM_TYPE_GRAFT, + PIM_TYPE_GRAFT_ACK, + PIM_TYPE_CANDIDATE_RP_ADV +}; #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) -- cgit v1.2.3 From bc8ee596afe8f35b379f87575c46d800dd8e7e68 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 1 Nov 2016 16:32:25 +0100 Subject: net: mii: add generic function to support ksetting support The old ethtool api (get_setting and set_setting) has generic mii functions mii_ethtool_sset and mii_ethtool_gset. To support the new ethtool api ({get|set}_link_ksettings), we add two generics mii function mii_ethtool_{get|set}_link_ksettings_get. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- include/linux/mii.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 47492c9631b3..1629a0c32679 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -31,7 +31,11 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_get_link_ksettings( + struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_set_link_ksettings( + struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd); extern int mii_check_gmii_support(struct mii_if_info *mii); extern void mii_check_link (struct mii_if_info *mii); extern unsigned int mii_check_media (struct mii_if_info *mii, -- cgit v1.2.3 From 1610a73c4175e7d63985316b52ac932b65a4dc90 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:12 +0100 Subject: netfilter: kill NF_HOOK_THRESH() and state->tresh Patch c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh") introduced br_nf_hook_thresh(). Replace NF_HOOK_THRESH() by br_nf_hook_thresh from br_nf_forward_finish(), so we have no more callers for this macro. As a result, state->thresh and explicit thresh parameter in the hook state structure is not required anymore. And we can get rid of skip-hook-under-thresh loop in nf_iterate() in the core path that is only used by br_netfilter to search for the filter hook. Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 50 ++++++++++----------------------------- include/linux/netfilter_ingress.h | 2 +- 2 files changed, 14 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index abc7fdcb9eb1..e0d000f6c9bf 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -49,7 +49,6 @@ struct sock; struct nf_hook_state { unsigned int hook; - int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; @@ -84,7 +83,7 @@ struct nf_hook_entry { static inline void nf_hook_state_init(struct nf_hook_state *p, struct nf_hook_entry *hook_entry, unsigned int hook, - int thresh, u_int8_t pf, + u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, @@ -92,7 +91,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; - p->thresh = thresh; p->pf = pf; p->in = indev; p->out = outdev; @@ -155,20 +153,16 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** - * nf_hook_thresh - call a netfilter hook + * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, - struct net *net, - struct sock *sk, - struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entry *hook_head; int ret = 1; @@ -185,8 +179,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, thresh, - pf, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev, + sk, net, okfn); ret = nf_hook_slow(skb, &state); } @@ -195,14 +189,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return ret; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, - struct sock *sk, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *)) -{ - return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); -} - /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). @@ -220,19 +206,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, coders :) */ -static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, - struct sk_buff *skb, struct net_device *in, - struct net_device *out, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) -{ - int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); - if (ret == 1) - ret = okfn(net, sk, skb); - return ret; -} - static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, @@ -242,7 +215,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } @@ -252,7 +225,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); + int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); + if (ret == 1) + ret = okfn(net, sk, skb); + return ret; } /* Call setsockopt() */ diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 33e37fb41d5d..fd44e4131710 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,7 +26,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); -- cgit v1.2.3 From 613dbd95723aee7abd16860745691b6c7bda20dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:21 +0100 Subject: netfilter: x_tables: move hook state into xt_action_param structure Place pointer to hook state in xt_action_param structure instead of copying the fields that we need. After this change xt_action_param fits into one cacheline. This patch also adds a set of new wrapper functions to fetch relevant hook state structure fields. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 48 ++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2ad1a2b289b5..cd4eaf8df445 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include #include +#include #include /* Test a struct->invflags and a boolean for inequality */ @@ -17,14 +18,9 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data - * @net network namespace through which the action was invoked - * @in: input netdevice - * @out: output netdevice + * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hook: hook number given packet came from - * @family: Actual NFPROTO_* through which the function is invoked - * (helpful when match->family == NFPROTO_UNSPEC) * * Fields written to by extensions: * @@ -38,15 +34,47 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; - struct net *net; - const struct net_device *in, *out; + const struct nf_hook_state *state; int fragoff; unsigned int thoff; - unsigned int hooknum; - u_int8_t family; bool hotdrop; }; +static inline struct net *xt_net(const struct xt_action_param *par) +{ + return par->state->net; +} + +static inline struct net_device *xt_in(const struct xt_action_param *par) +{ + return par->state->in; +} + +static inline const char *xt_inname(const struct xt_action_param *par) +{ + return par->state->in->name; +} + +static inline struct net_device *xt_out(const struct xt_action_param *par) +{ + return par->state->out; +} + +static inline const char *xt_outname(const struct xt_action_param *par) +{ + return par->state->out->name; +} + +static inline unsigned int xt_hooknum(const struct xt_action_param *par) +{ + return par->state->hook; +} + +static inline u_int8_t xt_family(const struct xt_action_param *par) +{ + return par->state->pf; +} + /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions -- cgit v1.2.3 From 01886bd91f1ba418ce669dfe97a06ca9504e482a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:35 +0100 Subject: netfilter: remove hook_entries field from nf_hook_state This field is only useful for nf_queue, so store it in the nf_queue_entry structure instead, away from the core path. Pass hook_head to nf_hook_slow(). Since we always have a valid entry on the first iteration in nf_iterate(), we can use 'do { ... } while (entry)' loop instead. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++------ include/linux/netfilter_ingress.h | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e0d000f6c9bf..69230140215b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,6 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; @@ -81,7 +80,6 @@ struct nf_hook_entry { }; static inline void nf_hook_state_init(struct nf_hook_state *p, - struct nf_hook_entry *hook_entry, unsigned int hook, u_int8_t pf, struct net_device *indev, @@ -96,7 +94,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } @@ -150,7 +147,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry); /** * nf_hook - call a netfilter hook @@ -179,10 +177,10 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev, + nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, hook_head); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index fd44e4131710..2dc3b49b804a 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, + nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state); + return nf_hook_slow(skb, &state, e); } static inline void nf_hook_ingress_init(struct net_device *dev) -- cgit v1.2.3 From 0cc0aa614b4c24b21b2492c0a1753035ee8c6edb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:17 -0400 Subject: ipv6: add IPV6_RECVFRAGSIZE cmsg When reading a datagram or raw packet that arrived fragmented, expose the maximum fragment size if recorded to allow applications to estimate receive path MTU. At this point, the field is only recorded when ipv6 connection tracking is enabled. A follow-up patch will record this field also in the ipv6 input path. Tested using the test for IP_RECVFRAGSIZE plus ip netns exec to ip addr add dev veth1 fc07::1/64 ip netns exec from ip addr add dev veth0 fc07::2/64 ip netns exec to ./recv_cmsg_recvfragsize -6 -u -p 6000 & ip netns exec from nc -q 1 -u fc07::1 6000 < payload Both with and without enabling connection tracking ip6tables -A INPUT -m state --state NEW -p udp -j LOG Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..1afb6e8d35c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -229,8 +229,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; -- cgit v1.2.3 From 68f929ff2654bced015ccb9b5555667f46f88dfa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Nov 2016 17:12:06 +0000 Subject: debugfs: constify argument to debugfs_real_fops() seq_file users can only access const version of file pointer, because the ->file member of struct seq_operations is marked as such. Make parameter to debugfs_real_fops() const. CC: Greg Kroah-Hartman CC: Nicolai Stange CC: Christian Lamparter CC: LKML Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/debugfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..bf1907d96097 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations *debugfs_real_fops(struct file *filp) +static inline const struct file_operations * +debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* -- cgit v1.2.3 From 5b4e2900512321435a5cd7dd77f58f23f3109950 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 4 Nov 2016 01:10:56 -0400 Subject: net: phy: broadcom: add bcm54xx_auxctl_read Add a helper function to read the AUXCTL register for the BCM54xx. This mirrors the bcm54xx_auxctl_write function already present in the code. Signed-off-by: Jon Mason Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 60def78c4e12..0ed66914b61c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -110,6 +110,7 @@ #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 -- cgit v1.2.3 From b14995ac2527b43a75c9190fbd4efd43fb1f4562 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 4 Nov 2016 01:10:58 -0400 Subject: net: phy: broadcom: Add BCM54810 PHY entry The BCM54810 PHY requires some semi-unique configuration, which results in some additional configuration in addition to the standard config. Also, some users of the BCM54810 require the PHY lanes to be swapped. Since there is no way to detect this, add a device tree query to see if it is applicable. Inspired-by: Vikas Soni Signed-off-by: Jon Mason Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 0ed66914b61c..848dc508ef57 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,6 +13,7 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 @@ -56,6 +57,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 + /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) #define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff) @@ -111,6 +113,7 @@ #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 @@ -192,6 +195,12 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BCM54810 Registers */ +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) +#define BCM54810_SHD_CLK_CTL 0x3 +#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ -- cgit v1.2.3 From 7c13f97ffde63cc792c49ec1513f3974f2f05229 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Nov 2016 11:28:59 +0100 Subject: udp: do fwd memory scheduling on dequeue A new argument is added to __skb_recv_datagram to provide an explicit skb destructor, invoked under the receive queue lock. The UDP protocol uses such argument to perform memory reclaiming on dequeue, so that the UDP protocol does not set anymore skb->desctructor. Instead explicit memory reclaiming is performed at close() time and when skbs are removed from the receive queue. The in kernel UDP protocol users now need to call a skb_recv_udp() variant instead of skb_recv_datagram() to properly perform memory accounting on dequeue. Overall, this allows acquiring only once the receive queue lock on dequeue. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr sinks vanilla patched 1 440 560 3 2150 2300 6 3650 3800 9 4450 4600 12 6250 6450 v1 -> v2: - do rmem and allocated memory scheduling under the receive lock - do bulk scheduling in first_packet_length() and in udp_destruct_sock() - avoid the typdef for the dequeue callback Suggested-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc6e23eaac91..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -- cgit v1.2.3 From 67db3e4bfbc90657c7be840aad5585be46240d6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Nov 2016 11:54:32 -0700 Subject: tcp: no longer hold ehash lock while calling tcp_get_info() We had various problems in the past in tcp_get_info() and used specific synchronization to avoid deadlocks. We would like to add more instrumentation points for TCP, and avoiding grabing socket lock in tcp_getinfo() was too costly. Being able to lock the socket allows to provide consistent set of fields. inet_diag_dump_icsk() can make sure ehash locks are not held any more when tcp_get_info() is called. We can remove syncp added in commit d654976cbf85 ("tcp: fix a potential deadlock in tcp_get_info()"), but we need to use lock_sock_fast() instead of spin_lock_bh() since TCP input path can now be run from process context. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..32a7c7e35b71 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -176,8 +176,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ -- cgit v1.2.3 From 5a3c7805c444d9d55f302a4b3930e8758be13fab Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Sat, 5 Nov 2016 14:04:52 +0100 Subject: Revert "net: stmmac: allow to split suspend/resume from init/exit callbacks" Instead of adding hooks inside stmmac_platform it is better to just use the standard PM callbacks within the specific dwmac-driver. This only used by the dwmac-rk driver. This reverts commit cecbc5563a02 ("stmmac: allow to split suspend/resume from init/exit callbacks"). Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 705840e0438f..3537fb33cc90 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -135,8 +135,6 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void (*suspend)(struct platform_device *pdev, void *priv); - void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; -- cgit v1.2.3 From c9f1b073d0d750ccf8b30b272d1d76479f4cccbc Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:44 +0200 Subject: net/mlx5: Add creation flags when adding new flow table When creating flow tables, allow the caller to specify creation flags. Currently no flags are used and as such this patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0dcd287f4bd0..ab1a5fd2e995 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,10 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, }; +enum { + MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, int max_num_groups, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, -- cgit v1.2.3 From 66958ed906b87816314c0517f05fe0b5766ec7fe Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:45 +0200 Subject: net/mlx5: Support encap id when setting new steering entry In order to support steering rules which add encapsulation headers, encap_id parameter is needed. Add new mlx5_flow_act struct which holds action related parameter: action, flow_tag and encap_id. Use mlx5_flow_act struct when adding a new steering rule. This patch doesn't change any functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ab1a5fd2e995..949b24b6c479 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -130,14 +130,19 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_flow_act { + u32 action; + u32 flow_tag; + u32 encap_id; +}; + /* Single destination per rule. * Group ID is implied by the match criteria. */ struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num); void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); -- cgit v1.2.3 From 1ababeba4a21f3dba3da3523c670b207fb2feb62 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:39 +0100 Subject: ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header) Implement minimal support for processing of SR-enabled packets as described in https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02. This patch implements the following operations: - Intermediate segment endpoint: incrementation of active segment and rerouting. - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH and routing of inner packet. - Cleanup flag support for SR-inlined packets: removal of SRH if we are the penultimate segment endpoint. A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled packets. Default is deny. This patch does not provide support for HMAC-signed packets. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/linux/seg6.h | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 include/linux/seg6.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1afb6e8d35c3..68d3f71f0abf 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,7 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include + +#endif -- cgit v1.2.3 From 915d7e5e5930b4f01d0971d93b9b25ed17d221aa Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:40 +0100 Subject: ipv6: sr: add code base for control plane support of SR-IPv6 This patch adds the necessary hooks and structures to provide support for SR-IPv6 control plane, essentially the Generic Netlink commands that will be used for userspace control over the Segment Routing kernel structures. The genetlink commands provide control over two different structures: tunnel source and HMAC data. The tunnel source is the source address that will be used by default when encapsulating packets into an outer IPv6 header + SRH. If the tunnel source is set to :: then an address of the outgoing interface will be selected as the source. The HMAC commands currently just return ENOTSUPP and will be implemented in a future patch. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_genl.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/seg6_genl.h (limited to 'include/linux') diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include + +#endif -- cgit v1.2.3 From 6c8702c60b88651072460f3f4026c7dfe2521d12 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:41 +0100 Subject: ipv6: sr: add support for SRH encapsulation and injection with lwtunnels This patch creates a new type of interfaceless lightweight tunnel (SEG6), enabling the encapsulation and injection of SRH within locally emitted packets and forwarded packets. >From a configuration viewpoint, a seg6 tunnel would be configured as follows: ip -6 ro ad fc00::1/128 encap seg6 mode encap segs fc42::1,fc42::2,fc42::3 dev eth0 Any packet whose destination address is fc00::1 would thus be encapsulated within an outer IPv6 header containing the SRH with three segments, and would actually be routed to the first segment of the list. If `mode inline' was specified instead of `mode encap', then the SRH would be directly inserted after the IPv6 header without outer encapsulation. The inline mode is only available if CONFIG_IPV6_SEG6_INLINE is enabled. This feature was made configurable because direct header insertion may break several mechanisms such as PMTUD or IPSec AH. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_iptunnel.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/seg6_iptunnel.h (limited to 'include/linux') diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include + +#endif -- cgit v1.2.3 From bf355b8d2c30a289232042cacc1cfaea4923936c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:42 +0100 Subject: ipv6: sr: add core files for SR HMAC support This patch adds the necessary functions to compute and check the HMAC signature of an SR-enabled packet. Two HMAC algorithms are supported: hmac(sha1) and hmac(sha256). In order to avoid dynamic memory allocation for each HMAC computation, a per-cpu ring buffer is allocated for this purpose. A new per-interface sysctl called seg6_require_hmac is added, allowing a user-defined policy for processing HMAC-signed SR-enabled packets. A value of -1 means that the HMAC field will always be ignored. A value of 0 means that if an HMAC field is present, its validity will be enforced (the packet is dropped is the signature is incorrect). Finally, a value of 1 means that any SR-enabled packet that does not contain an HMAC signature or whose signature is incorrect will be dropped. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/linux/seg6_hmac.h | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 include/linux/seg6_hmac.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68d3f71f0abf..93756585521f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -65,6 +65,9 @@ struct ipv6_devconf { __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include + +#endif -- cgit v1.2.3 From 149d6ad83663b4820ca09c9d40b1eea7f5c22c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Nov 2016 11:07:28 -0800 Subject: net: napi_hash_add() is no longer exported There are no more users except from net/core/dev.c napi_hash_add() can now be static. Signed-off-by: Eric Dumazet Cc: Michael Chan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 66fd61c681d9..d64135a0ab71 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,17 +467,6 @@ static inline void napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -/** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - /** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context -- cgit v1.2.3 From d8d26354191399627bac9cf0da0667b0f5178686 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 8 Nov 2016 22:49:16 +0100 Subject: ptp: Introduce a high resolution frequency adjustment method. The internal PTP Hardware Clock (PHC) interface limits the resolution for frequency adjustments to one part per billion. However, some hardware devices allow finer adjustment, and making use of the increased resolution improves synchronization measurably on such devices. This patch adds an alternative method that allows finer frequency tuning by passing the scaled ppm value to PHC drivers. This value comes from user space, and it has a resolution of about 0.015 ppb. We also deprecate the older method, anticipating its removal once existing drivers have been converted over. Signed-off-by: Richard Cochran Suggested-by: Ulrik De Bie Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 5ad54fc66cf0..b76d47aba564 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -58,7 +58,14 @@ struct system_device_crosststamp; * * clock operations * + * @adjfine: Adjusts the frequency of the hardware clock. + * parameter scaled_ppm: Desired frequency offset from + * nominal frequency in parts per million, but with a + * 16 bit binary fractional field. + * * @adjfreq: Adjusts the frequency of the hardware clock. + * This method is deprecated. New drivers should implement + * the @adjfine method instead. * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * @@ -108,6 +115,7 @@ struct ptp_clock_info { int n_pins; int pps; struct ptp_pin_desc *pin_config; + int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); -- cgit v1.2.3 From 2da16a6948ca8f025e2c226ea4fc32baa6b90f27 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:17:25 +0100 Subject: netfilter: ipset: Remove extra whitespaces in ip_set.h Remove unnecessary whitespaces. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 83b9a2e0d8d4..5b1fd090f34b 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -336,14 +336,15 @@ ip_set_update_counter(struct ip_set_counter *counter, static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbmark = skbinfo->skbmark; + mext->skbmarkmask = skbinfo->skbmarkmask; + mext->skbprio = skbinfo->skbprio; + mext->skbqueue = skbinfo->skbqueue; } + static inline bool ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) { -- cgit v1.2.3 From da9fbfa76f32a031cb70b11e9fa650e30c85d040 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:24:15 +0100 Subject: netfilter: ipset: Mark some helper args as const. Mark some of the helpers arguments as const. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 4 ++-- include/linux/netfilter/ipset/ip_set_comment.h | 2 +- include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5b1fd090f34b..524467f933bf 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -346,7 +346,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, } static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) { /* Send nonzero parameters only */ return ((skbinfo->skbmark || skbinfo->skbmarkmask) && @@ -373,7 +373,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, } static inline bool -ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, cpu_to_be64(ip_set_get_bytes(counter)), diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8d0248525957..bae5c7609be2 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,7 +43,7 @@ ip_set_init_comment(struct ip_set_comment *comment, /* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int -ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 1d6a935c1ac5..bfb3531fd88a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_expired(unsigned long *t) +ip_set_timeout_expired(const unsigned long *t) { return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } @@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) } static inline u32 -ip_set_timeout_get(unsigned long *timeout) +ip_set_timeout_get(const unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; -- cgit v1.2.3 From 7ffea37957b900422ce8b82e9651f7a0a6fac733 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:31:03 +0100 Subject: netfilter: ipset: Headers file cleanup Group counter helper functions together. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 524467f933bf..1ea28e30a6dd 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -334,6 +334,27 @@ ip_set_update_counter(struct ip_set_counter *counter, } } +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, @@ -372,27 +393,6 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, skbinfo->skbqueue = ext->skbqueue; } -static inline bool -ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ -- cgit v1.2.3 From bec810d973003b30bc477146904af6bd93fd2df8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 5 May 2015 17:13:28 +0200 Subject: netfilter: ipset: Improve skbinfo get/init helpers Use struct ip_set_skbinfo in struct ip_set_ext instead of open coded fields and assign structure members in get/init helpers instead of copying members one by one. Explicitly note that struct ip_set_skbinfo must be padded to prevent non-aligned access in the extension blob. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 1ea28e30a6dd..780262124632 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -92,17 +92,6 @@ struct ip_set_ext_type { extern const struct ip_set_ext_type ip_set_extensions[]; -struct ip_set_ext { - u64 packets; - u64 bytes; - u32 timeout; - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; - char *comment; -}; - struct ip_set_counter { atomic64_t bytes; atomic64_t packets; @@ -122,6 +111,15 @@ struct ip_set_skbinfo { u32 skbmarkmask; u32 skbprio; u16 skbqueue; + u16 __pad; +}; + +struct ip_set_ext { + struct ip_set_skbinfo skbinfo; + u64 packets; + u64 bytes; + char *comment; + u32 timeout; }; struct ip_set; @@ -360,10 +358,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbinfo = *skbinfo; } static inline bool @@ -387,10 +382,7 @@ static inline void ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext) { - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; + *skbinfo = ext->skbinfo; } /* Netlink CB args */ -- cgit v1.2.3 From 1d0d6bd61d495d271b9774a15fbea93e4875474b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 6 May 2015 07:27:28 +0200 Subject: netfilter: ipset: Use kmalloc() in comment extension helper Allocate memory with kmalloc() rather than kzalloc(): the string is immediately initialized so it is unnecessary to zero out the allocated memory area. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_comment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index bae5c7609be2..5444b1bbe656 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -34,7 +34,7 @@ ip_set_init_comment(struct ip_set_comment *comment, return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); -- cgit v1.2.3 From 57982edc2739b4473868e7579c0185270468bae1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 21:34:56 +0200 Subject: netfilter: ipset: Split extensions into separate files Cleanup to separate all extensions into individual files. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 95 +------------------------- include/linux/netfilter/ipset/ip_set_counter.h | 75 ++++++++++++++++++++ include/linux/netfilter/ipset/ip_set_skbinfo.h | 46 +++++++++++++ 3 files changed, 123 insertions(+), 93 deletions(-) create mode 100644 include/linux/netfilter/ipset/ip_set_counter.h create mode 100644 include/linux/netfilter/ipset/ip_set_skbinfo.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 780262124632..b5bd0fb3d07b 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -292,99 +292,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); } -static inline void -ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -{ - atomic64_add((long long)bytes, &(counter)->bytes); -} - -static inline void -ip_set_add_packets(u64 packets, struct ip_set_counter *counter) -{ - atomic64_add((long long)packets, &(counter)->packets); -} - -static inline u64 -ip_set_get_bytes(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->bytes); -} - -static inline u64 -ip_set_get_packets(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->packets); -} - -static inline void -ip_set_update_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - if (ext->packets != ULLONG_MAX && - !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { - ip_set_add_bytes(ext->bytes, counter); - ip_set_add_packets(ext->packets, counter); - } - if (flags & IPSET_FLAG_MATCH_COUNTERS) { - mext->packets = ip_set_get_packets(counter); - mext->bytes = ip_set_get_bytes(counter); - } -} - -static inline bool -ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - -static inline void -ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - mext->skbinfo = *skbinfo; -} - -static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) -{ - /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && - nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask), - IPSET_ATTR_PAD)) || - (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); -} - -static inline void -ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext) -{ - *skbinfo = ext->skbinfo; -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ @@ -539,6 +446,8 @@ bitmap_bytes(u32 a, u32 b) #include #include +#include +#include int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h new file mode 100644 index 000000000000..bb6fba480118 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_counter.h @@ -0,0 +1,75 @@ +#ifndef _IP_SET_COUNTER_H +#define _IP_SET_COUNTER_H + +/* Copyright (C) 2015 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) +{ + atomic64_add((long long)bytes, &(counter)->bytes); +} + +static inline void +ip_set_add_packets(u64 packets, struct ip_set_counter *counter) +{ + atomic64_add((long long)packets, &(counter)->packets); +} + +static inline u64 +ip_set_get_bytes(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->bytes); +} + +static inline u64 +ip_set_get_packets(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->packets); +} + +static inline void +ip_set_update_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + if (ext->packets != ULLONG_MAX && + !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { + ip_set_add_bytes(ext->bytes, counter); + ip_set_add_packets(ext->packets, counter); + } + if (flags & IPSET_FLAG_MATCH_COUNTERS) { + mext->packets = ip_set_get_packets(counter); + mext->bytes = ip_set_get_bytes(counter); + } +} + +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_COUNTER_H */ diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h new file mode 100644 index 000000000000..29d7ef2bc3fa --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h @@ -0,0 +1,46 @@ +#ifndef _IP_SET_SKBINFO_H +#define _IP_SET_SKBINFO_H + +/* Copyright (C) 2015 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + mext->skbinfo = *skbinfo; +} + +static inline bool +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) +{ + /* Send nonzero parameters only */ + return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + nla_put_net64(skb, IPSET_ATTR_SKBMARK, + cpu_to_be64((u64)skbinfo->skbmark << 32 | + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || + (skbinfo->skbprio && + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + cpu_to_be32(skbinfo->skbprio))) || + (skbinfo->skbqueue && + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + cpu_to_be16(skbinfo->skbqueue))); +} + +static inline void +ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext) +{ + *skbinfo = ext->skbinfo; +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_SKBINFO_H */ -- cgit v1.2.3 From 837a90eab67edfa464dcc0ddef193449d23da408 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 21:52:51 +0200 Subject: netfilter: ipset: Regroup ip_set_put_extensions and add extern Cleanup: group ip_set_put_extensions and ip_set_get_extensions together and add missing extern. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index b5bd0fb3d07b..7a218eb74887 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -331,6 +331,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); +extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active); static inline int ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) @@ -449,10 +451,6 @@ bitmap_bytes(u32 a, u32 b) #include #include -int -ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active); - #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ .timeout = ip_set_adt_opt_timeout(opt, set) } -- cgit v1.2.3 From 702b71e7c666a1c9be9d49e8cd173f0d4d1e859f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 22:07:41 +0200 Subject: netfilter: ipset: Add element count to all set types header It is better to list the set elements for all set types, thus the header information is uniform. Element counts are therefore added to the bitmap and list types. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 ++ include/linux/netfilter/ipset/ip_set_bitmap.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7a218eb74887..4671d740610f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -250,6 +250,8 @@ struct ip_set { u8 flags; /* Default timeout value, if enabled */ u32 timeout; + /* Number of elements (vs timeout) */ + u32 elements; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 5e4662a71e01..366d6c0ea04f 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -6,8 +6,8 @@ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF enum { + IPSET_ADD_STORE_PLAIN_TIMEOUT = -1, IPSET_ADD_FAILED = 1, - IPSET_ADD_STORE_PLAIN_TIMEOUT, IPSET_ADD_START_STORED_TIMEOUT, }; -- cgit v1.2.3 From 9e41f26a505cca04b7122e65053cf6447007ea79 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 12:05:34 +0100 Subject: netfilter: ipset: Count non-static extension memory for userspace Non-static (i.e. comment) extension was not counted into the memory size. A new internal counter is introduced for this. In the case of the hash types the sizes of the arrays are counted there as well so that we can avoid to scan the whole set when just the header data is requested. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 8 ++++++-- include/linux/netfilter/ipset/ip_set_comment.h | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 4671d740610f..8e42253e5d4d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -79,10 +79,12 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +struct ip_set; + /* Extension type */ struct ip_set_ext_type { /* Destroy extension private data (can be NULL) */ - void (*destroy)(void *ext); + void (*destroy)(struct ip_set *set, void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -252,6 +254,8 @@ struct ip_set { u32 timeout; /* Number of elements (vs timeout) */ u32 elements; + /* Size of the dynamic extensions (vs timeout) */ + size_t ext_size; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ @@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data) */ if (SET_WITH_COMMENT(set)) ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( - ext_comment(data, set)); + set, ext_comment(data, set)); } static inline int diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 5444b1bbe656..8e2bab1e8e90 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb) * The kadt functions don't use the comment extensions in any way. */ static inline void -ip_set_init_comment(struct ip_set_comment *comment, +ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext) { struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; if (unlikely(c)) { + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } @@ -38,6 +39,7 @@ ip_set_init_comment(struct ip_set_comment *comment, if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); + set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } @@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) * of the set data anymore. */ static inline void -ip_set_comment_free(struct ip_set_comment *comment) +ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) { struct ip_set_comment_rcu *c; c = rcu_dereference_protected(comment->c, 1); if (unlikely(!c)) return; + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } -- cgit v1.2.3 From c540594f864bb4645573c2c0a304919fabb3d7ea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Nov 2016 22:02:34 +0100 Subject: bpf, mlx4: fix prog refcount in mlx4_en_try_alloc_resources error path Commit 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme") added a bug in that the prog's reference count is not dropped in the error path when mlx4_en_try_alloc_resources() is failing from mlx4_xdp_set(). We previously took bpf_prog_add(prog, priv->rx_ring_num - 1), that we need to release again. Earlier in the call path, dev_change_xdp_fd() itself holds a reference to the prog as well (hence the '- 1' in the bpf_prog_add()), so a simple atomic_sub() is safe to use here. When an error is propagated, then bpf_prog_put() is called eventually from dev_change_xdp_fd() Fixes: 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index edcd96ded8aa..01c1487277b2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -234,6 +234,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); +void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); @@ -303,6 +304,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) return ERR_PTR(-EOPNOTSUPP); } +static inline void bpf_prog_sub(struct bpf_prog *prog, int i) +{ +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } -- cgit v1.2.3 From 372788f964c95a6fa0f677c43d6153c27896ef42 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Thu, 10 Nov 2016 17:10:46 -0600 Subject: net: phy: expose phy_aneg_done API for use by drivers Make phy_aneg_done() available to drivers so that the result of the auto-negotiation initiated by phy_start_aneg() can be determined. Remove the local implementation of phy_aneg_done() from the Aeroflex driver and use the phy library version. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e7e1fd382564..9880d73a2c3d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -786,6 +786,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); -- cgit v1.2.3 From 4a4f86cc7d6bc74522f581341a2cae3119d5a0f5 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Sun, 13 Nov 2016 20:43:52 -0800 Subject: vxlan: avoid vlan processing in vxlan device. VxLan device does not have special handling for vlan taging on egress. Therefore it does not make sense to expose vlan offloading feature. This patch does not change vxlan functinality. Signed-off-by: Pravin B Shelar Acked-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3319d97d789d..8d5fcd6284ce 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -399,22 +399,6 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) skb->vlan_tci = 0; return skb; } -/* - * vlan_hwaccel_push_inside - pushes vlan tag to the payload - * @skb: skbuff to tag - * - * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the - * VLAN tag from @skb->vlan_tci inside to the payload. - * - * Following the skb_unshare() example, in case of error, the calling function - * doesn't have to worry about freeing the original skb. - */ -static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb) -{ - if (skb_vlan_tag_present(skb)) - skb = __vlan_hwaccel_push_inside(skb); - return skb; -} /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting -- cgit v1.2.3 From e86a8987e458a1826f509c41494b0b29a61144a7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Nov 2016 10:06:30 -0800 Subject: net: phy: Add phy_ethtool_nway_reset This function just calls into genphy_restart_aneg() to perform an autonegotation restart. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9880d73a2c3d..b9bd3b4f4ea1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -860,6 +860,7 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, struct ethtool_link_ksettings *cmd); int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); +int phy_ethtool_nway_reset(struct net_device *ndev); int __init mdio_bus_init(void); void mdio_bus_exit(void); -- cgit v1.2.3 From ff86aae3b4112b85d2231c23bccbc49589df1c06 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 15 Nov 2016 10:41:01 +0200 Subject: devres: add devm_alloc_percpu() Introduce managed counterparts for alloc_percpu() and free_percpu(). Add devm_alloc_percpu() and devm_free_percpu() into the managed interfaces list. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- include/linux/device.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bc41e87a969b..a00105cf795e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -698,6 +698,25 @@ static inline int devm_add_action_or_reset(struct device *dev, return ret; } +/** + * devm_alloc_percpu - Resource-managed alloc_percpu + * @dev: Device to allocate per-cpu memory for + * @type: Type to allocate per-cpu memory for + * + * Managed alloc_percpu. Per-cpu memory allocated with this function is + * automatically freed on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +#define devm_alloc_percpu(dev, type) \ + ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \ + __alignof__(type))) + +void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, + size_t align); +void devm_free_percpu(struct device *dev, void __percpu *pdata); + struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about -- cgit v1.2.3 From 217f6974368188fd8bd7804bf5a036aa5762c5e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 10:15:11 -0800 Subject: net: busy-poll: allow preemption in sk_busy_loop() After commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job"), sk_busy_loop() needs a bit of care : softirqs might be delayed since we do not allow preemption yet. This patch adds preemptiom points in sk_busy_loop(), and makes sure no unnecessary cache line dirtying or atomic operations are done while looping. A new flag is added into napi->state : NAPI_STATE_IN_BUSY_POLL This prevents napi_complete_done() from clearing NAPIF_STATE_SCHED, so that sk_busy_loop() does not have to grab it again. Similarly, netpoll_poll_lock() is done one time. This gives about 10 to 20 % improvement in various busy polling tests, especially when many threads are busy polling in configurations with large number of NIC queues. This should allow experimenting with bigger delays without hurting overall latencies. Tested: On a 40Gb mlx4 NIC, 32 RX/TX queues. echo 70 >/proc/sys/net/core/busy_read for i in `seq 1 40`; do echo -n $i: ; ./super_netperf $i -H lpaa24 -t UDP_RR -- -N -n; done Before: After: 1: 90072 92819 2: 157289 184007 3: 235772 213504 4: 344074 357513 5: 394755 458267 6: 461151 487819 7: 549116 625963 8: 544423 716219 9: 720460 738446 10: 794686 837612 11: 915998 923960 12: 937507 925107 13: 1019677 971506 14: 1046831 1113650 15: 1114154 1148902 16: 1105221 1179263 17: 1266552 1299585 18: 1258454 1383817 19: 1341453 1312194 20: 1363557 1488487 21: 1387979 1501004 22: 1417552 1601683 23: 1550049 1642002 24: 1568876 1601915 25: 1560239 1683607 26: 1640207 1745211 27: 1706540 1723574 28: 1638518 1722036 29: 1734309 1757447 30: 1782007 1855436 31: 1724806 1888539 32: 1717716 1944297 33: 1778716 1869118 34: 1805738 1983466 35: 1815694 2020758 36: 1893059 2035632 37: 1843406 2034653 38: 1888830 2086580 39: 1972827 2143567 40: 1877729 2181851 Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Adam Belay Cc: Tariq Toukan Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 86bacf6a64f0..e71de66e3792 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -334,6 +334,16 @@ enum { NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ +}; + +enum { + NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), + NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { -- cgit v1.2.3 From 364b6055738b4c752c30ccaaf25c624e69d76195 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 10:15:13 -0800 Subject: net: busy-poll: return busypolling status to drivers NAPI drivers use napi_complete_done() or napi_complete() when they drained RX ring and right before re-enabling device interrupts. In busy polling, we can avoid interrupts being delivered since we are polling RX ring in a controlled loop. Drivers can chose to use napi_complete_done() return value to reduce interrupts overhead while busy polling is active. This is optional, legacy drivers should work fine even if not updated. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Adam Belay Cc: Tariq Toukan Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e71de66e3792..bcddf951ccee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -463,16 +463,17 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } -void __napi_complete(struct napi_struct *n); -void napi_complete_done(struct napi_struct *n, int work_done); +bool __napi_complete(struct napi_struct *n); +bool napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. + * Return false if device should avoid rearming interrupts. */ -static inline void napi_complete(struct napi_struct *n) +static inline bool napi_complete(struct napi_struct *n) { return napi_complete_done(n, 0); } -- cgit v1.2.3 From 89c4b442b78bdba388337cc746fe63caba85f46c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 14:54:50 -0800 Subject: netpoll: more efficient locking Callers of netpoll_poll_lock() own NAPI_STATE_SCHED Callers of netpoll_poll_unlock() have BH blocked between the NAPI_STATE_SCHED being cleared and poll_lock is released. We can avoid the spinlock which has no contention, and use cmpxchg() on poll_owner which we need to set anyway. This removes a possible lockdep violation after the cited commit, since sk_busy_loop() re-enables BH before calling busy_poll_stop() Fixes: 217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - include/linux/netpoll.h | 13 +++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcddf951ccee..e84800edd249 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -316,7 +316,6 @@ struct napi_struct { unsigned int gro_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL - spinlock_t poll_lock; int poll_owner; #endif struct net_device *dev; diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b25ee9ffdbe6..1828900c9411 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -78,8 +78,11 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) struct net_device *dev = napi->dev; if (dev && dev->npinfo) { - spin_lock(&napi->poll_lock); - napi->poll_owner = smp_processor_id(); + int owner = smp_processor_id(); + + while (cmpxchg(&napi->poll_owner, -1, owner) != -1) + cpu_relax(); + return napi; } return NULL; @@ -89,10 +92,8 @@ static inline void netpoll_poll_unlock(void *have) { struct napi_struct *napi = have; - if (napi) { - napi->poll_owner = -1; - spin_unlock(&napi->poll_lock); - } + if (napi) + smp_store_release(&napi->poll_owner, -1); } static inline bool netpoll_tx_running(struct net_device *dev) -- cgit v1.2.3 From 0ac3ea70897fb9f84b620aeda074ecccf481629d Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 17 Nov 2016 13:45:55 +0200 Subject: net/mlx5: Make the command interface cache more flexible Add more cache command size sets and more entries for each set based on the current commands set different sizes and commands frequency. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ecc451d89ccd..5e7dbbcf47f0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -208,7 +208,7 @@ struct mlx5_cmd_first { struct mlx5_cmd_msg { struct list_head list; - struct cache_ent *cache; + struct cmd_msg_cache *parent; u32 len; struct mlx5_cmd_first first; struct mlx5_cmd_mailbox *next; @@ -228,17 +228,17 @@ struct mlx5_cmd_debug { u16 outlen; }; -struct cache_ent { +struct cmd_msg_cache { /* protect block chain allocations */ spinlock_t lock; struct list_head head; + unsigned int max_inbox_size; + unsigned int num_ent; }; -struct cmd_msg_cache { - struct cache_ent large; - struct cache_ent med; - +enum { + MLX5_NUM_COMMAND_CACHES = 5, }; struct mlx5_cmd_stats { @@ -281,7 +281,7 @@ struct mlx5_cmd { struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct pci_pool *pool; struct mlx5_cmd_debug dbg; - struct cmd_msg_cache cache; + struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; -- cgit v1.2.3 From 4ce3bf2fa8ba309b5ca19539fcc8671a0fc084f9 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 17 Nov 2016 13:45:56 +0200 Subject: net/mlx5: Port module event hardware structures Add hardware structures and constants definitions needed for module events support. Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 11 +++++++++++ include/linux/mlx5/mlx5_ifc.h | 3 ++- include/linux/mlx5/port.h | 3 +++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 58276144ba81..52b437431c6a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -277,6 +277,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, @@ -552,6 +553,15 @@ struct mlx5_eqe_vport_change { __be32 rsvd1[6]; } __packed; +struct mlx5_eqe_port_module { + u8 reserved_at_0[1]; + u8 module; + u8 reserved_at_2[1]; + u8 module_status; + u8 reserved_at_4[2]; + u8 error_type; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -565,6 +575,7 @@ union ev_data { struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_page_fault page_fault; struct mlx5_eqe_vport_change vport_change; + struct mlx5_eqe_port_module port_module; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2632cb2caf10..cd1d530ca368 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -824,7 +824,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 early_vf_enable[0x1]; u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x2]; + u8 port_module_event[0x1]; + u8 reserved_at_1b0[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b2[0x1]; u8 disable_link_up[0x1]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index b3065acd20b4..dde8c7ec5ff1 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -94,6 +94,9 @@ enum mlx5e_link_mode { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -- cgit v1.2.3 From d4eb4cd78b0774c7061db56844ed2ea7790cc77c Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 17 Nov 2016 13:45:57 +0200 Subject: net/mlx5: Add handling for port module event For each asynchronous port module event: 1. print with ratelimit to the dmesg log 2. increment the corresponding event counter Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5e7dbbcf47f0..7336c8e529d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -498,6 +498,31 @@ struct mlx5_rl_table { struct mlx5_rl_entry *rl_entry; }; +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_NUM = 0x3, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE, + MLX5_MODULE_EVENT_ERROR_UNKNOWN, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_port_module_event_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -559,6 +584,8 @@ struct mlx5_priv { unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; + + struct mlx5_port_module_event_stats pme_stats; }; enum mlx5_device_state { -- cgit v1.2.3 From 0dbc6fe09fbe5f5191bcc606f3bdc9a829f97066 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 17 Nov 2016 13:45:59 +0200 Subject: net/mlx5: Set driver version infrastructure Add driver_version capability bit is enabled, and set driver version command in mlx5_ifc firmware header. The only purpose of this command is to store a driver version/OS string in FW to be reported and displayed in various management systems, such as IPMI/BMC. Signed-off-by: Saeed Mahameed Signed-off-by: Huy Nguyen Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cd1d530ca368..f08a06247fba 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -83,6 +83,7 @@ enum { MLX5_CMD_OP_SET_HCA_CAP = 0x109, MLX5_CMD_OP_QUERY_ISSI = 0x10a, MLX5_CMD_OP_SET_ISSI = 0x10b, + MLX5_CMD_OP_SET_DRIVER_VERSION = 0x10d, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -909,7 +910,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_261[0x1]; + u8 driver_version[0x1]; u8 pad_tx_eth_packet[0x1]; u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; @@ -4005,6 +4006,25 @@ struct mlx5_ifc_query_issi_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_set_driver_version_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_driver_version_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + u8 driver_version[64][0x8]; +}; + struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 7f503169cabd70c1f13b9279c50eca7dfb9a7d51 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 17 Nov 2016 13:46:01 +0200 Subject: net/mlx5: Add MPCNT register infrastructure Add the needed infrastructure for future use of MPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 5 +++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 93 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 52b437431c6a..9f489365b3d3 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1071,6 +1071,11 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; +enum { + MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, + MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, +}; + static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7336c8e529d7..ae1f451e8f89 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -121,6 +121,7 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, + MLX5_REG_MPCNT = 0x9051, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f08a06247fba..a5f0fbedf1e7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,6 +1757,80 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; +struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { + u8 life_time_counter_high[0x20]; + + u8 life_time_counter_low[0x20]; + + u8 rx_errors[0x20]; + + u8 tx_errors[0x20]; + + u8 l0_to_recovery_eieos[0x20]; + + u8 l0_to_recovery_ts[0x20]; + + u8 l0_to_recovery_framing[0x20]; + + u8 l0_to_recovery_retrain[0x20]; + + u8 crc_error_dllp[0x20]; + + u8 crc_error_tlp[0x20]; + + u8 reserved_at_140[0x680]; +}; + +struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { + u8 life_time_counter_high[0x20]; + + u8 life_time_counter_low[0x20]; + + u8 time_to_boot_image_start[0x20]; + + u8 time_to_link_image[0x20]; + + u8 calibration_time[0x20]; + + u8 time_to_first_perst[0x20]; + + u8 time_to_detect_state[0x20]; + + u8 time_to_l0[0x20]; + + u8 time_to_crs_en[0x20]; + + u8 time_to_plastic_image_start[0x20]; + + u8 time_to_iron_image_start[0x20]; + + u8 perst_handler[0x20]; + + u8 times_in_l1[0x20]; + + u8 times_in_l23[0x20]; + + u8 dl_down[0x20]; + + u8 config_cycle1usec[0x20]; + + u8 config_cycle2to7usec[0x20]; + + u8 config_cycle_8to15usec[0x20]; + + u8 config_cycle_16_to_63usec[0x20]; + + u8 config_cycle_64usec[0x20]; + + u8 correctable_err_msg_sent[0x20]; + + u8 non_fatal_err_msg_sent[0x20]; + + u8 fatal_err_msg_sent[0x20]; + + u8 reserved_at_2e0[0x4e0]; +}; + struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2921,6 +2995,12 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; +union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; + struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; + u8 reserved_at_0[0x7c0]; +}; + union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7240,6 +7320,18 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; +struct mlx5_ifc_mpcnt_reg_bits { + u8 reserved_at_0[0x8]; + u8 pcie_index[0x8]; + u8 reserved_at_10[0xa]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_at_21[0x1f]; + + union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; +}; + struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7845,6 +7937,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; -- cgit v1.2.3 From 968ad9da7e0e333e25442950e10a1b631981ce84 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Thu, 17 Nov 2016 13:07:21 +0100 Subject: ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE Adding get_tunable/set_tunable function pointer to the phy_driver structure, and uses these function pointers to implement the ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE ioctls. Signed-off-by: Raju Lakkaraju Reviewed-by: Andrew Lunn Signed-off-by: Allan W. Nielsen Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b9bd3b4f4ea1..edde28ce163a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -611,6 +611,13 @@ struct phy_driver { void (*get_strings)(struct phy_device *dev, u8 *data); void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + + /* Get and Set PHY tunables */ + int (*get_tunable)(struct phy_device *dev, + struct ethtool_tunable *tuna, void *data); + int (*set_tunable)(struct phy_device *dev, + struct ethtool_tunable *tuna, + const void *data); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -- cgit v1.2.3 From 603ab57363a0ba66c77ca4b3027bc0b4505df504 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 17 Nov 2016 11:19:12 -0800 Subject: bus: mvebu-bus: Provide inline stub for mvebu_mbus_get_dram_win_info In preparation for allowing CONFIG_MVNETA_BM to build with COMPILE_TEST, provide an inline stub for mvebu_mbus_get_dram_win_info(). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mbus.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 2931aa43dab1..0d3f14fd2621 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -82,6 +82,7 @@ static inline int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, } #endif +#ifdef CONFIG_MVEBU_MBUS int mvebu_mbus_save_cpu_target(u32 __iomem *store_addr); void mvebu_mbus_get_pcie_mem_aperture(struct resource *res); void mvebu_mbus_get_pcie_io_aperture(struct resource *res); @@ -97,5 +98,12 @@ int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base, size_t mbus_size, phys_addr_t sdram_phys_base, size_t sdram_size); int mvebu_mbus_dt_init(bool is_coherent); +#else +static inline int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, + u8 *attr) +{ + return -EINVAL; +} +#endif /* CONFIG_MVEBU_MBUS */ #endif /* __LINUX_MBUS_H */ -- cgit v1.2.3 From d66016a77757b004b8637f44d87bedfc4a47b89c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 18 Nov 2016 15:40:39 -0800 Subject: virtio_net.h: Fix comment. Fix incorrent comment after the final #endif. Signed-off-by: Jarno Rajahalme Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1c912f85e041..74f1e3363506 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -98,4 +98,4 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return 0; } -#endif /* _LINUX_VIRTIO_BYTEORDER */ +#endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 9403cd7cbb08aa3709c632decafa2014c8ed96e6 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Fri, 18 Nov 2016 15:40:40 -0800 Subject: virtio_net: Do not clear memory for struct virtio_net_hdr twice. virtio_net_hdr_from_skb() clears the memory for the header, so there is no point for the callers to do the same. Signed-off-by: Jarno Rajahalme Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 74f1e3363506..66204007d7ac 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -58,7 +58,7 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian) { - memset(hdr, 0, sizeof(*hdr)); + memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); -- cgit v1.2.3 From c72d8cdaa5dbd3baf918046ee5149ab69330923e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Nov 2016 04:08:08 +0300 Subject: net: fix bogus cast in skb_pagelen() and use unsigned variables 1) cast to "int" is unnecessary: u8 will be promoted to int before decrementing, small positive numbers fit into "int", so their values won't be changed during promotion. Once everything is int including loop counters, signedness doesn't matter: 32-bit operations will stay 32-bit operations. But! Someone tried to make this loop smart by making everything of the same type apparently in an attempt to optimise it. Do the optimization, just differently. Do the cast where it matters. :^) 2) frag size is unsigned entity and sum of fragments sizes is also unsigned. Make everything unsigned, leave no MOVSX instruction behind. add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-4 (-4) function old new delta skb_cow_data 835 834 -1 ip_do_fragment 2549 2548 -1 ip6_fragment 3130 3128 -2 Total: Before=154865032, After=154865028, chg -0.00% Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a4aeeca7e805..9c535fbccf2c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1799,11 +1799,11 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int skb_pagelen(const struct sk_buff *skb) { - int i, len = 0; + unsigned int i, len = 0; - for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) + for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len + skb_headlen(skb); } -- cgit v1.2.3 From 6d67942dd0ebc3dddc86edf9208169d064a9b3d7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 19 Nov 2016 01:45:03 +0100 Subject: bpf: add __must_check attributes to refcount manipulating helpers Helpers like bpf_prog_add(), bpf_prog_inc(), bpf_map_inc() can fail with an error, so make sure the caller properly checks their return value and not just ignores it, which could worst-case lead to use after free. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 01c1487277b2..69d0a7f12a3b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -233,14 +233,14 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); -struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); +struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); -struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); +struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); @@ -299,7 +299,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } -static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) +static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, + int i) { return ERR_PTR(-EOPNOTSUPP); } @@ -311,7 +312,8 @@ static inline void bpf_prog_sub(struct bpf_prog *prog, int i) static inline void bpf_prog_put(struct bpf_prog *prog) { } -static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) + +static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3 From d06f78c4232d6a84b50839f61d9d7fbb222d8118 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Nov 2016 11:40:55 -0800 Subject: net: phy: broadcom: Add support code for downshift/Wirespeed Broadcom's Wirespeed feature allows us to configure how auto-negotiation should behave with fewer working pairs of wires on a cable. Add support code for retrieving and setting such downshift counters using the recently added ethtool downshift tunables. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 848dc508ef57..f9f8aaf9c943 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -114,6 +114,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN (1 << 4) #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 @@ -130,6 +131,7 @@ #define BCM_LED_SRC_INTR 0x6 #define BCM_LED_SRC_QUALITY 0x7 #define BCM_LED_SRC_RCVLED 0x8 +#define BCM_LED_SRC_WIRESPEED 0x9 #define BCM_LED_SRC_MULTICOLOR1 0xa #define BCM_LED_SRC_OPENSHORT 0xb #define BCM_LED_SRC_OFF 0xe /* Tied high */ @@ -141,6 +143,14 @@ * Shadow values go into bits [14:10] of register 0x1c to select a shadow * register to access. */ + +/* 00100: Reserved control register 2 */ +#define BCM54XX_SHD_SCR2 0x04 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_DIS 0x100 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT 2 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET 2 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK 0x7 + /* 00101: Spare Control Register 3 */ #define BCM54XX_SHD_SCR3 0x05 #define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 -- cgit v1.2.3 From 3df5b3c67546fb05266766b6abaf71563f82efe4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 22 Nov 2016 23:09:54 +0200 Subject: net: Add net-device param to the get offloaded stats ndo Some drivers would need to check few internal matters for that. To be used in downstream mlx5 commit. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e84800edd249..ae32a27523f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -925,7 +925,7 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * - * bool (*ndo_has_offload_stats)(int attr_id) + * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id) * Return true if this device supports offload stats of this attr_id. * * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, @@ -1165,7 +1165,7 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); - bool (*ndo_has_offload_stats)(int attr_id); + bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, void *attr_data); -- cgit v1.2.3 From 34e4e99078667d30f71a50c1e5181e4270e9d8bb Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 22 Nov 2016 23:09:58 +0200 Subject: net/mlx5: Enable to query min inline for a specific vport Also move the inline capablities enum to a shared header vport.h Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 451b0bde9083..ec35157ea725 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,6 +36,12 @@ #include #include +enum { + MLX5_CAP_INLINE_MODE_L2, + MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, + MLX5_CAP_INLINE_MODE_NOT_REQUIRED, +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); @@ -43,8 +49,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); -void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, - u8 *min_inline); +int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 *min_inline); int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 3007098494bec614fb55dee7bc0410bb7db5ad18 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 23 Nov 2016 16:52:26 +0100 Subject: cgroup: add support for eBPF programs This patch adds two sets of eBPF program pointers to struct cgroup. One for such that are directly pinned to a cgroup, and one for such that are effective for it. To illustrate the logic behind that, assume the following example cgroup hierarchy. A - B - C \ D - E If only B has a program attached, it will be effective for B, C, D and E. If D then attaches a program itself, that will be effective for both D and E, and the program in B will only affect B and C. Only one program of a given type is effective for a cgroup. Attaching and detaching programs will be done through the bpf(2) syscall. For now, ingress and egress inet socket filtering are the only supported use-cases. Signed-off-by: Daniel Mack Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 79 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cgroup-defs.h | 4 +++ 2 files changed, 83 insertions(+) create mode 100644 include/linux/bpf-cgroup.h (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h new file mode 100644 index 000000000000..ec80d0c0953e --- /dev/null +++ b/include/linux/bpf-cgroup.h @@ -0,0 +1,79 @@ +#ifndef _BPF_CGROUP_H +#define _BPF_CGROUP_H + +#include +#include +#include + +struct sock; +struct cgroup; +struct sk_buff; + +#ifdef CONFIG_CGROUP_BPF + +extern struct static_key_false cgroup_bpf_enabled_key; +#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) + +struct cgroup_bpf { + /* + * Store two sets of bpf_prog pointers, one for programs that are + * pinned directly to this cgroup, and one for those that are effective + * when this cgroup is accessed. + */ + struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; + struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE]; +}; + +void cgroup_bpf_put(struct cgroup *cgrp); +void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); + +void __cgroup_bpf_update(struct cgroup *cgrp, + struct cgroup *parent, + struct bpf_prog *prog, + enum bpf_attach_type type); + +/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ +void cgroup_bpf_update(struct cgroup *cgrp, + struct bpf_prog *prog, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ +}) + +#else + +struct cgroup_bpf {}; +static inline void cgroup_bpf_put(struct cgroup *cgrp) {} +static inline void cgroup_bpf_inherit(struct cgroup *cgrp, + struct cgroup *parent) {} + +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) + +#endif /* CONFIG_CGROUP_BPF */ + +#endif /* _BPF_CGROUP_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5b17de62c962..861b4677fc5b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -300,6 +301,9 @@ struct cgroup { /* used to schedule release agent */ struct work_struct release_agent_work; + /* used to store eBPF programs */ + struct cgroup_bpf bpf; + /* ids of the ancestors at each level including self */ int ancestor_ids[]; }; -- cgit v1.2.3 From 5a717f4f8f2830f297b5511022481bdc27b9d576 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 24 Nov 2016 07:04:08 +0200 Subject: netdevice: fix sparse warning for HARD_TX_LOCK sparse warns about context imbalance in any code that uses HARD_TX_LOCK/UNLOCK - this is because it's unable to determine that flags don't change so lock and unlock are paired. Seems easy enough to fix by adding __acquire/__release calls. With this patch af_packet.c is now sparse-clean, Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ff57cd2eba3b..4ffcd874cc20 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3462,6 +3462,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) txq->xmit_lock_owner = cpu; } +static inline bool __netif_tx_acquire(struct netdev_queue *txq) +{ + __acquire(&txq->_xmit_lock); + return true; +} + +static inline void __netif_tx_release(struct netdev_queue *txq) +{ + __release(&txq->_xmit_lock); +} + static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); @@ -3563,17 +3574,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) #define HARD_TX_LOCK(dev, txq, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_lock(txq, cpu); \ + } else { \ + __netif_tx_acquire(txq); \ } \ } #define HARD_TX_TRYLOCK(dev, txq) \ (((dev->features & NETIF_F_LLTX) == 0) ? \ __netif_tx_trylock(txq) : \ - true ) + __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_unlock(txq); \ + } else { \ + __netif_tx_release(txq); \ } \ } -- cgit v1.2.3 From 88575199cc65de99a156888629a68180c830eff2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 26 Nov 2016 01:28:04 +0100 Subject: bpf: drop unnecessary context cast from BPF_PROG_RUN Since long already bpf_func is not only about struct sk_buff * as input anymore. Make it generic as void *, so that callers don't need to cast for it each time they call BPF_PROG_RUN(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1f09c521adfe..7f246a281435 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -408,8 +408,8 @@ struct bpf_prog { enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ - unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct bpf_insn *filter); + unsigned int (*bpf_func)(const void *ctx, + const struct bpf_insn *insn); /* Instructions for interpreter */ union { struct sock_filter insns[0]; @@ -504,7 +504,7 @@ static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, u32 ret; rcu_read_lock(); - ret = BPF_PROG_RUN(prog, (void *)xdp); + ret = BPF_PROG_RUN(prog, xdp); rcu_read_unlock(); return ret; -- cgit v1.2.3 From c491680f8f489926eebfdf2cd006767fc8bdaa49 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 26 Nov 2016 01:28:06 +0100 Subject: bpf: reuse dev_is_mac_header_xmit for redirect Commit dcf800344a91 ("net/sched: act_mirred: Refactor detection whether dev needs xmit at mac header") added dev_is_mac_header_xmit(); since it's also useful elsewhere, move it to if_arp.h and reuse it for BPF. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/if_arp.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index f563907ed776..3355efc89781 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -44,4 +44,20 @@ static inline int arp_hdr_len(struct net_device *dev) return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; } } + +static inline bool dev_is_mac_header_xmit(const struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + default: + return true; + } +} + #endif /* _LINUX_IF_ARP_H */ -- cgit v1.2.3 From 3a6a931dfb8e49a7377825b465d84e110fe89f68 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Sun, 27 Nov 2016 17:02:04 +0200 Subject: net/mlx5e: Support DCBX CEE API Add DCBX CEE API interface for ConnectX-4. Configurations are stored in a temporary structure and are applied to the card's firmware when the CEE's setall callback function is called. Note: priority group in CEE is equivalent to traffic class in ConnectX-4 hardware spec. bw allocation per priority in CEE is not supported because ConnectX-4 only supports bw allocation per traffic class. user priority in CEE does not have an equivalent term in ConnectX-4. Therefore, user priority to priority mapping in CEE is not supported. Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index dde8c7ec5ff1..bdee439f8cf3 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,8 +141,12 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, int mlx5_max_tc(struct mlx5_core_dev *mdev); int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc); int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct); int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, u8 *max_bw_unit); -- cgit v1.2.3 From 341c5ee2fb78420ffc441df36f93226be8069b0a Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Sun, 27 Nov 2016 17:02:06 +0200 Subject: net/mlx5: Add DCBX firmware commands support Add set/query commands for DCBX_PARAM register Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 7 +++++++ include/linux/mlx5/port.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ae1f451e8f89..68b85efc3908 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -104,6 +104,8 @@ enum { enum { MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_DCBX_PARAM = 0x4020, + MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, @@ -124,6 +126,11 @@ enum { MLX5_REG_MPCNT = 0x9051, }; +enum mlx5_dcbx_oper_mode { + MLX5E_DCBX_PARAM_VER_OPER_HOST = 0x0, + MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, +}; + enum { MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index bdee439f8cf3..e527732fb31b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -162,4 +162,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, u16 offset, u16 size, u8 *data); +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From d853d145ea3e63387a2ac759aa41d5e43876e561 Mon Sep 17 00:00:00 2001 From: jbrunet Date: Mon, 28 Nov 2016 10:46:46 +0100 Subject: net: phy: add an option to disable EEE advertisement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds an option to disable EEE advertisement in the generic PHY by providing a mask of prohibited modes corresponding to the value found in the MDIO_AN_EEE_ADV register. On some platforms, PHY Low power idle seems to be causing issues, even breaking the link some cases. The patch provides a convenient way for these platforms to disable EEE advertisement and work around the issue. Signed-off-by: Jerome Brunet Tested-by: Yegor Yefremov Tested-by: Andreas Färber Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index edde28ce163a..b53177fd38af 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -417,6 +417,9 @@ struct phy_device { u32 advertising; u32 lp_advertising; + /* Energy efficient ethernet modes which should be prohibited */ + u32 eee_broken_modes; + int autoneg; int link_timeout; -- cgit v1.2.3 From 05b055e89121394058c75dc354e9a46e1e765579 Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:13 -0800 Subject: tcp: instrument tcp sender limits chronographs This patch implements the skeleton of the TCP chronograph instrumentation on sender side limits: 1) idle (unspec) 2) busy sending data other than 3-4 below 3) rwnd-limited 4) sndbuf-limited The limits are enumerated 'tcp_chrono'. Since a connection in theory can idle forever, we do not track the actual length of this uninteresting idle period. For the rest we track how long the sender spends in each limit. At any point during the life time of a connection, the sender must be in one of the four states. If there are multiple conditions worthy of tracking in a chronograph then the highest priority enum takes precedence over the other conditions. So that if something "more interesting" starts happening, stop the previous chrono and start a new one. The time unit is jiffy(u32) in order to save space in tcp_sock. This implies application must sample the stats no longer than every 49 days of 1ms jiffy. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32a7c7e35b71..d5d3bd814338 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -211,8 +211,11 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u8 chrono_type:2, /* current chronograph type */ + rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ -- cgit v1.2.3 From 1c885808e45601b2b6f68b30ac1d999e10b6f606 Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:18 -0800 Subject: tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING This patch exports the sender chronograph stats via the socket SO_TIMESTAMPING channel. Currently we can instrument how long a particular application unit of data was queued in TCP by tracking SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_SCHED. Having these sender chronograph stats exported simultaneously along with these timestamps allow further breaking down the various sender limitation. For example, a video server can tell if a particular chunk of video on a connection takes a long time to deliver because TCP was experiencing small receive window. It is not possible to tell before this patch without packet traces. To prepare these stats, the user needs to set SOF_TIMESTAMPING_OPT_STATS and SOF_TIMESTAMPING_OPT_TSONLY flags while requesting other SOF_TIMESTAMPING TX timestamps. When the timestamps are available in the error queue, the stats are returned in a separate control message of type SCM_TIMESTAMPING_OPT_STATS, in a list of TLVs (struct nlattr) of types: TCP_NLA_BUSY_TIME, TCP_NLA_RWND_LIMITED, TCP_NLA_SNDBUF_LIMITED. Unit is microsecond. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d5d3bd814338..00e0ee8f001f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -428,4 +428,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); + #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 820ee17b8d3b2a57b1ea20b247cc6a1dddaf8b8d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 29 Nov 2016 09:57:17 -0800 Subject: net: phy: broadcom: Add support code for reading PHY counters Broadcom PHYs expose a number of PHY error counters: receive errors, false carrier sense, SerDes BER count, local and remote receive errors. Add support code to allow retrieving these error counters. Since the Broadcom PHY library code is used by several drivers, make it possible for them to specify the storage for the software copy of the statistics. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index f9f8aaf9c943..4f7d8be9ddbf 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -244,6 +244,9 @@ #define LPI_FEATURE_EN_DIG1000X 0x4000 /* Core register definitions*/ +#define MII_BRCM_CORE_BASE12 0x12 +#define MII_BRCM_CORE_BASE13 0x13 +#define MII_BRCM_CORE_BASE14 0x14 #define MII_BRCM_CORE_BASE1E 0x1E #define MII_BRCM_CORE_EXPB0 0xB0 #define MII_BRCM_CORE_EXPB1 0xB1 -- cgit v1.2.3 From 85de8576a0b14aecc99136cfbf90e367fa2142cb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Nov 2016 23:16:54 +0100 Subject: bpf, xdp: allow to pass flags to dev_change_xdp_fd Add an IFLA_XDP_FLAGS attribute that can be passed for setting up XDP along with IFLA_XDP_FD, which eventually allows user space to implement typical add/replace/delete logic for programs. Right now, calling into dev_change_xdp_fd() will always replace previous programs. When passed XDP_FLAGS_UPDATE_IF_NOEXIST, we can handle this more graceful when requested by returning -EBUSY in case we try to attach a new program, but we find that another one is already attached. This will be used by upcoming front-end for iproute2 as well. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4ffcd874cc20..3755317cc6a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3253,7 +3253,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd); +int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit v1.2.3 From b634d30a79ecc2d28e61cbe5b1f4443952f37a8f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Nov 2016 10:16:08 -0800 Subject: cgroup, bpf: remove unnecessary #include this #include is unnecessary and brings whole set of other headers into cgroup-defs.h. Remove it. Fixes: 3007098494be ("cgroup: add support for eBPF programs") Signed-off-by: Alexei Starovoitov Acked-by: Rami Rosen Acked-by: Daniel Borkmann Acked-by: Daniel Mack Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ec80d0c0953e..0cf1adfadd2d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -1,7 +1,6 @@ #ifndef _BPF_CGROUP_H #define _BPF_CGROUP_H -#include #include #include -- cgit v1.2.3 From 6d937acfb3f166f6e10abd978fafafa120d6f0d7 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 29 Nov 2016 16:47:01 +0200 Subject: qed: Optimize qed_chain datapath usage The chain structure and functions are widely used by the qed* modules, both for configuration and datapath. E.g., qede's Tx has one such chain and its Rx has two. Currently, the strucutre's fields which are required for datapath related functions [produce/consume] are intertwined with fields which are required only for configuration purposes [init/destroy/etc.]. This patch re-arranges the chain structure so that all the fields which are required for datapath usage could reside in a single cacheline instead of the two which are required today. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 144 +++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 72d88cf3ca25..37dfba101c6c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -56,23 +56,6 @@ struct qed_chain_pbl_u32 { u32 cons_page_idx; }; -struct qed_chain_pbl { - /* Base address of a pre-allocated buffer for pbl */ - dma_addr_t p_phys_table; - void *p_virt_table; - - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. - */ - void **pp_virt_addr_tbl; - - /* Index to current used page by producer/consumer */ - union { - struct qed_chain_pbl_u16 pbl16; - struct qed_chain_pbl_u32 pbl32; - } u; -}; - struct qed_chain_u16 { /* Cyclic index of next element to produce/consme */ u16 prod_idx; @@ -86,46 +69,78 @@ struct qed_chain_u32 { }; struct qed_chain { - void *p_virt_addr; - dma_addr_t p_phys_addr; - void *p_prod_elem; - void *p_cons_elem; + /* fastpath portion of the chain - required for commands such + * as produce / consume. + */ + /* Point to next element to produce/consume */ + void *p_prod_elem; + void *p_cons_elem; + + /* Fastpath portions of the PBL [if exists] */ + struct { + /* Table for keeping the virtual addresses of the chain pages, + * respectively to the physical addresses in the pbl table. + */ + void **pp_virt_addr_tbl; - enum qed_chain_mode mode; - enum qed_chain_use_mode intended_use; /* used to produce/consume */ - enum qed_chain_cnt_type cnt_type; + union { + struct qed_chain_pbl_u16 u16; + struct qed_chain_pbl_u32 u32; + } c; + } pbl; union { struct qed_chain_u16 chain16; struct qed_chain_u32 chain32; } u; + /* Capacity counts only usable elements */ + u32 capacity; u32 page_cnt; - /* Number of elements - capacity is for usable elements only, - * while size will contain total number of elements [for entire chain]. + enum qed_chain_mode mode; + + /* Elements information for fast calculations */ + u16 elem_per_page; + u16 elem_per_page_mask; + u16 elem_size; + u16 next_page_mask; + u16 usable_per_page; + u8 elem_unusable; + + u8 cnt_type; + + /* Slowpath of the chain - required for initialization and destruction, + * but isn't involved in regular functionality. */ - u32 capacity; + + /* Base address of a pre-allocated buffer for pbl */ + struct { + dma_addr_t p_phys_table; + void *p_virt_table; + } pbl_sp; + + /* Address of first page of the chain - the address is required + * for fastpath operation [consume/produce] but only for the the SINGLE + * flavour which isn't considered fastpath [== SPQ]. + */ + void *p_virt_addr; + dma_addr_t p_phys_addr; + + /* Total number of elements [for entire chain] */ u32 size; - /* Elements information for fast calculations */ - u16 elem_per_page; - u16 elem_per_page_mask; - u16 elem_unusable; - u16 usable_per_page; - u16 elem_size; - u16 next_page_mask; - struct qed_chain_pbl pbl; + u8 intended_use; }; #define QED_CHAIN_PBL_ENTRY_SIZE (8) #define QED_CHAIN_PAGE_SIZE (0x1000) #define ELEMS_PER_PAGE(elem_size) (QED_CHAIN_PAGE_SIZE / (elem_size)) -#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ - ((mode == QED_CHAIN_MODE_NEXT_PTR) ? \ - (1 + ((sizeof(struct qed_chain_next) - 1) / \ - (elem_size))) : 0) +#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ + (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \ + (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \ + (elem_size))) : 0) #define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ ((u32)(ELEMS_PER_PAGE(elem_size) - \ @@ -186,7 +201,7 @@ static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain) return p_chain->usable_per_page; } -static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) +static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) { return p_chain->elem_unusable; } @@ -198,7 +213,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain) static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) { - return p_chain->pbl.p_phys_table; + return p_chain->pbl_sp.p_phys_table; } /** @@ -214,10 +229,10 @@ static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) static inline void qed_chain_advance_page(struct qed_chain *p_chain, void **p_next_elem, void *idx_to_inc, void *page_to_inc) - { struct qed_chain_next *p_next = NULL; u32 page_index = 0; + switch (p_chain->mode) { case QED_CHAIN_MODE_NEXT_PTR: p_next = *p_next_elem; @@ -305,7 +320,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) if ((p_chain->u.chain16.prod_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_prod_idx = &p_chain->u.chain16.prod_idx; - p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx; + p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, p_prod_idx, p_prod_page_idx); } @@ -314,7 +329,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) if ((p_chain->u.chain32.prod_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_prod_idx = &p_chain->u.chain32.prod_idx; - p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx; + p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, p_prod_idx, p_prod_page_idx); } @@ -378,7 +393,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) if ((p_chain->u.chain16.cons_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_cons_idx = &p_chain->u.chain16.cons_idx; - p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx; + p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, p_cons_idx, p_cons_page_idx); } @@ -387,8 +402,8 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) if ((p_chain->u.chain32.cons_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_cons_idx = &p_chain->u.chain32.cons_idx; - p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx; - qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, + p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx; + qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, p_cons_idx, p_cons_page_idx); } p_chain->u.chain32.cons_idx++; @@ -429,25 +444,26 @@ static inline void qed_chain_reset(struct qed_chain *p_chain) u32 reset_val = p_chain->page_cnt - 1; if (is_chain_u16(p_chain)) { - p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val; - p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val; + p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val; + p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val; } else { - p_chain->pbl.u.pbl32.prod_page_idx = reset_val; - p_chain->pbl.u.pbl32.cons_page_idx = reset_val; + p_chain->pbl.c.u32.prod_page_idx = reset_val; + p_chain->pbl.c.u32.cons_page_idx = reset_val; } } switch (p_chain->intended_use) { - case QED_CHAIN_USE_TO_CONSUME_PRODUCE: - case QED_CHAIN_USE_TO_PRODUCE: - /* Do nothing */ - break; - case QED_CHAIN_USE_TO_CONSUME: /* produce empty elements */ for (i = 0; i < p_chain->capacity; i++) qed_chain_recycle_consumed(p_chain); break; + + case QED_CHAIN_USE_TO_CONSUME_PRODUCE: + case QED_CHAIN_USE_TO_PRODUCE: + default: + /* Do nothing */ + break; } } @@ -473,13 +489,13 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->p_virt_addr = NULL; p_chain->p_phys_addr = 0; p_chain->elem_size = elem_size; - p_chain->intended_use = intended_use; + p_chain->intended_use = (u8)intended_use; p_chain->mode = mode; - p_chain->cnt_type = cnt_type; + p_chain->cnt_type = (u8)cnt_type; - p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); + p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; + p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode); p_chain->next_page_mask = (p_chain->usable_per_page & p_chain->elem_per_page_mask); @@ -488,8 +504,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->capacity = p_chain->usable_per_page * page_cnt; p_chain->size = p_chain->elem_per_page * page_cnt; - p_chain->pbl.p_phys_table = 0; - p_chain->pbl.p_virt_table = NULL; + p_chain->pbl_sp.p_phys_table = 0; + p_chain->pbl_sp.p_virt_table = NULL; p_chain->pbl.pp_virt_addr_tbl = NULL; } @@ -530,8 +546,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, dma_addr_t p_phys_pbl, void **pp_virt_addr_tbl) { - p_chain->pbl.p_phys_table = p_phys_pbl; - p_chain->pbl.p_virt_table = p_virt_pbl; + p_chain->pbl_sp.p_phys_table = p_phys_pbl; + p_chain->pbl_sp.p_virt_table = p_virt_pbl; p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; } -- cgit v1.2.3 From 3da7a37ae6886cfba9ef35428eb976fc2ef561fa Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 29 Nov 2016 16:47:06 +0200 Subject: qed*: Handle-based L2-queues. The driver needs to maintain several FW/HW-indices for each one of its queues. Currently, that mapping is done by the QED where it uses an rx/tx array of so-called hw-cids, populating them whenever a new queue is opened and clearing them upon destruction of said queues. This maintenance is far from ideal - there's no real reason why QED needs to maintain such a data-structure. It becomes even worse when considering the fact that the PF's queues and its child VFs' queues are all mapped into the same data-structure. As a by-product, the set of parameters an interface needs to supply for queue APIs is non-trivial, and some of the variables in the API structures have different meaning depending on their exact place in the configuration flow. This patch re-organizes the way L2 queues are configured and maintained. In short: - Required parameters for queue init are now well-defined. - Qed would allocate a queue-cid based on parameters. Upon initialization success, it would return a handle to caller. - Queue-handle would be maintained by entity requesting queue-init, not necessarily qed. - All further queue-APIs [update, destroy] would use the opaque handle as reference for the queue instead of various indices. The possible owners of such handles: - PF queues [qede] - complete handles based on provided configuration. - VF queues [qede] - fw-context-less handles, containing only relative information; Only the PF-side would need the absolute indices for configuration, so they're omitted here. - VF queues [qed, PF-side] - complete handles based on VF initialization. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 56 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 9755a3feb52e..7a52f7c58c37 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -15,6 +15,29 @@ #include #include +struct qed_queue_start_common_params { + /* Should always be relative to entity sending this. */ + u8 vport_id; + u16 queue_id; + + /* Relative, but relevant only for PFs */ + u8 stats_id; + + /* These are always absolute */ + u16 sb; + u8 sb_idx; +}; + +struct qed_rxq_start_ret_params { + void __iomem *p_prod; + void *p_handle; +}; + +struct qed_txq_start_ret_params { + void __iomem *p_doorbell; + void *p_handle; +}; + struct qed_dev_eth_info { struct qed_dev_info common; @@ -56,18 +79,6 @@ struct qed_start_vport_params { bool clear_stats; }; -struct qed_stop_rxq_params { - u8 rss_id; - u8 rx_queue_id; - u8 vport_id; - bool eq_completion_only; -}; - -struct qed_stop_txq_params { - u8 rss_id; - u8 tx_queue_id; -}; - enum qed_filter_rx_mode_type { QED_FILTER_RX_MODE_TYPE_REGULAR, QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC, @@ -112,15 +123,6 @@ struct qed_filter_params { union qed_filter_type_params filter; }; -struct qed_queue_start_common_params { - u8 rss_id; - u8 queue_id; - u8 vport_id; - u16 sb; - u16 sb_idx; - u16 vf_qid; -}; - struct qed_tunn_params { u16 vxlan_port; u8 update_vxlan_port; @@ -220,24 +222,24 @@ struct qed_eth_ops { struct qed_update_vport_params *params); int (*q_rx_start)(struct qed_dev *cdev, + u8 rss_num, struct qed_queue_start_common_params *params, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size, - void __iomem **pp_prod); + struct qed_rxq_start_ret_params *ret_params); - int (*q_rx_stop)(struct qed_dev *cdev, - struct qed_stop_rxq_params *params); + int (*q_rx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); int (*q_tx_start)(struct qed_dev *cdev, + u8 rss_num, struct qed_queue_start_common_params *params, dma_addr_t pbl_addr, u16 pbl_size, - void __iomem **pp_doorbell); + struct qed_txq_start_ret_params *ret_params); - int (*q_tx_stop)(struct qed_dev *cdev, - struct qed_stop_txq_params *params); + int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); int (*filter_config)(struct qed_dev *cdev, struct qed_filter_params *params); -- cgit v1.2.3 From f4ed2fe34fb793755ef8cfc3509e783c4709ffc1 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Tue, 29 Nov 2016 15:16:46 +0530 Subject: net: phy: add mdix_ctrl to hold the user configuration. Add new parameter mdix_ctrl to hold the user configuration. Existing mdix maintain the current status of MDI(X) crossover performed or not. mdix_ctrl can configure either ETH_TP_MDI or ETH_TP_MDI_X orETH_TP_MDI_AUTO. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b53177fd38af..feb8a98e8dd3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -450,6 +450,7 @@ struct phy_device { struct net_device *attached_dev; u8 mdix; + u8 mdix_ctrl; void (*adjust_link)(struct net_device *dev); }; -- cgit v1.2.3 From 1c1b522808a18402f043c1418b4e48c7355480cc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Nov 2016 17:59:37 +0200 Subject: net/mlx5e: Implement Fragmented Work Queue (WQ) Add new type of struct mlx5_frag_buf which is used to allocate fragmented buffers rather than contiguous, and make the Completion Queues (CQs) use it as they are big (default of 2MB per CQ in Striding RQ). This fixes the failures of type: "mlx5e_open_locked: mlx5e_open_channels failed, -12" due to dma_zalloc_coherent insufficient contiguous coherent memory to satisfy the driver's request when the user tries to setup more or larger rings. Signed-off-by: Tariq Toukan Reported-by: Sebastian Ott Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 68b85efc3908..0ae55361e674 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -318,6 +318,13 @@ struct mlx5_buf { u8 page_shift; }; +struct mlx5_frag_buf { + struct mlx5_buf_list *frags; + int npages; + int size; + u8 page_shift; +}; + struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -822,6 +829,9 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node); +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, @@ -866,6 +876,7 @@ void mlx5_unregister_debugfs(void); int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -- cgit v1.2.3 From 3a0af8fd61f90920f6fa04e4f1e9a6a73c1b4fd2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 30 Nov 2016 17:10:10 +0100 Subject: bpf: BPF for lightweight tunnel infrastructure Registers new BPF program types which correspond to the LWT hooks: - BPF_PROG_TYPE_LWT_IN => dst_input() - BPF_PROG_TYPE_LWT_OUT => dst_output() - BPF_PROG_TYPE_LWT_XMIT => lwtunnel_xmit() The separate program types are required to differentiate between the capabilities each LWT hook allows: * Programs attached to dst_input() or dst_output() are restricted and may only read the data of an skb. This prevent modification and possible invalidation of already validated packet headers on receive and the construction of illegal headers while the IP headers are still being assembled. * Programs attached to lwtunnel_xmit() are allowed to modify packet content as well as prepending an L2 header via a newly introduced helper bpf_skb_change_head(). This is safe as lwtunnel_xmit() is invoked after the IP header has been assembled completely. All BPF programs receive an skb with L3 headers attached and may return one of the following error codes: BPF_OK - Continue routing as per nexthop BPF_DROP - Drop skb and return EPERM BPF_REDIRECT - Redirect skb to device as per redirect() helper. (Only valid in lwtunnel_xmit() context) The return codes are binary compatible with their TC_ACT_ relatives to ease compatibility. Signed-off-by: Thomas Graf Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7f246a281435..7ba644626553 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -438,7 +438,7 @@ struct xdp_buff { }; /* compute the linear packet data range [data, data_end) which - * will be accessed by cls_bpf and act_bpf programs + * will be accessed by cls_bpf, act_bpf and lwt programs */ static inline void bpf_compute_data_end(struct sk_buff *skb) { -- cgit v1.2.3 From 366cbf2f46048d70005c6c33dc289330f24b54b0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Nov 2016 22:16:06 +0100 Subject: bpf, xdp: drop rcu_read_lock from bpf_prog_run_xdp and move to caller After 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"), the rcu_read_lock() in bpf_prog_run_xdp() is superfluous, since callers need to hold rcu_read_lock() already to make sure BPF program doesn't get released in the background. Thus, drop it from bpf_prog_run_xdp(), as it can otherwise be misleading. Still keeping the bpf_prog_run_xdp() is useful as it allows for grepping in XDP supported drivers and to keep the typecheck on the context intact. For mlx4, this means we don't have a double rcu_read_lock() anymore. nfp can just make use of bpf_prog_run_xdp(), too. For qede, just move rcu_read_lock() out of the helper. When the driver gets atomic replace support, this will move to call-sites eventually. mlx5 needs actual fixing as it has the same issue as described already in 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"), that is, we're under RCU bh at this time, BPF programs are released via call_rcu(), and call_rcu() != call_rcu_bh(), so we need to properly mark read side as programs can get xchg()'ed in mlx5e_xdp_set() without queue reset. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/filter.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7ba644626553..97338134398f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -498,16 +498,16 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return BPF_PROG_RUN(prog, skb); } -static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, - struct xdp_buff *xdp) +static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) { - u32 ret; - - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, xdp); - rcu_read_unlock(); - - return ret; + /* Caller needs to hold rcu_read_lock() (!), otherwise program + * can be released while still running, or map elements could be + * freed early while still having concurrent users. XDP fastpath + * already takes rcu_read_lock() when fetching the program, so + * it's not necessary here anymore. + */ + return BPF_PROG_RUN(prog, xdp); } static inline unsigned int bpf_prog_size(unsigned int proglen) -- cgit v1.2.3 From fc831825f99eb3a2f1bf3fe7307b392513b642a5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 1 Dec 2016 00:21:06 -0800 Subject: qed: Add support for hardware offloaded iSCSI. This adds the backbone required for the various HW initalizations which are necessary for the iSCSI driver (qedi) for QLogic FastLinQ 4xxxx line of adapters - FW notification, resource initializations, etc. Signed-off-by: Arun Easi Signed-off-by: Yuval Mintz Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 2 + include/linux/qed/qed_iscsi_if.h | 229 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 include/linux/qed/qed_iscsi_if.h (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ea095b4893aa..4b454f4f5b25 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -166,6 +166,7 @@ struct qed_iscsi_pf_params { u32 max_cwnd; u16 cq_num_entries; u16 cmdq_num_entries; + u32 two_msl_timer; u16 dup_ack_threshold; u16 tx_sws_timer; u16 min_rto; @@ -275,6 +276,7 @@ struct qed_dev_info { enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, QED_SB_TYPE_CNQ, + QED_SB_TYPE_STORAGE, }; enum qed_protocol { diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h new file mode 100644 index 000000000000..d27912480cb3 --- /dev/null +++ b/include/linux/qed/qed_iscsi_if.h @@ -0,0 +1,229 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_ISCSI_IF_H +#define _QED_ISCSI_IF_H +#include +#include + +typedef int (*iscsi_event_cb_t) (void *context, + u8 fw_event_code, void *fw_handle); +struct qed_iscsi_stats { + u64 iscsi_rx_bytes_cnt; + u64 iscsi_rx_packet_cnt; + u64 iscsi_rx_new_ooo_isle_events_cnt; + u32 iscsi_cmdq_threshold_cnt; + u32 iscsi_rq_threshold_cnt; + u32 iscsi_immq_threshold_cnt; + + u64 iscsi_rx_dropped_pdus_task_not_valid; + + u64 iscsi_rx_data_pdu_cnt; + u64 iscsi_rx_r2t_pdu_cnt; + u64 iscsi_rx_total_pdu_cnt; + + u64 iscsi_tx_go_to_slow_start_event_cnt; + u64 iscsi_tx_fast_retransmit_event_cnt; + + u64 iscsi_tx_data_pdu_cnt; + u64 iscsi_tx_r2t_pdu_cnt; + u64 iscsi_tx_total_pdu_cnt; + + u64 iscsi_tx_bytes_cnt; + u64 iscsi_tx_packet_cnt; +}; + +struct qed_dev_iscsi_info { + struct qed_dev_info common; + + void __iomem *primary_dbq_rq_addr; + void __iomem *secondary_bdq_rq_addr; +}; + +struct qed_iscsi_id_params { + u8 mac[ETH_ALEN]; + u32 ip[4]; + u16 port; +}; + +struct qed_iscsi_params_offload { + u8 layer_code; + dma_addr_t sq_pbl_addr; + u32 initial_ack; + + struct qed_iscsi_id_params src; + struct qed_iscsi_id_params dst; + u16 vlan_id; + u8 tcp_flags; + u8 ip_version; + u8 default_cq; + + u8 ka_max_probe_cnt; + u8 dup_ack_theshold; + u32 rcv_next; + u32 snd_una; + u32 snd_next; + u32 snd_max; + u32 snd_wnd; + u32 rcv_wnd; + u32 snd_wl1; + u32 cwnd; + u32 ss_thresh; + u16 srtt; + u16 rtt_var; + u32 ts_time; + u32 ts_recent; + u32 ts_recent_age; + u32 total_rt; + u32 ka_timeout_delta; + u32 rt_timeout_delta; + u8 dup_ack_cnt; + u8 snd_wnd_probe_cnt; + u8 ka_probe_cnt; + u8 rt_cnt; + u32 flow_label; + u32 ka_timeout; + u32 ka_interval; + u32 max_rt_time; + u32 initial_rcv_wnd; + u8 ttl; + u8 tos_or_tc; + u16 remote_port; + u16 local_port; + u16 mss; + u8 snd_wnd_scale; + u8 rcv_wnd_scale; + u32 ts_ticks_per_second; + u16 da_timeout_value; + u8 ack_frequency; +}; + +struct qed_iscsi_params_update { + u8 update_flag; +#define QED_ISCSI_CONN_HD_EN BIT(0) +#define QED_ISCSI_CONN_DD_EN BIT(1) +#define QED_ISCSI_CONN_INITIAL_R2T BIT(2) +#define QED_ISCSI_CONN_IMMEDIATE_DATA BIT(3) + + u32 max_seq_size; + u32 max_recv_pdu_length; + u32 max_send_pdu_length; + u32 first_seq_length; + u32 exp_stat_sn; +}; + +#define MAX_TID_BLOCKS_ISCSI (512) +struct qed_iscsi_tid { + u32 size; /* In bytes per task */ + u32 num_tids_per_block; + u8 *blocks[MAX_TID_BLOCKS_ISCSI]; +}; + +struct qed_iscsi_cb_ops { + struct qed_common_cb_ops common; +}; + +/** + * struct qed_iscsi_ops - qed iSCSI operations. + * @common: common operations pointer + * @ll2: light L2 operations pointer + * @fill_dev_info: fills iSCSI specific information + * @param cdev + * @param info + * @return 0 on sucesss, otherwise error value. + * @register_ops: register iscsi operations + * @param cdev + * @param ops - specified using qed_iscsi_cb_ops + * @param cookie - driver private + * @start: iscsi in FW + * @param cdev + * @param tasks - qed will fill information about tasks + * return 0 on success, otherwise error value. + * @stop: iscsi in FW + * @param cdev + * return 0 on success, otherwise error value. + * @acquire_conn: acquire a new iscsi connection + * @param cdev + * @param handle - qed will fill handle that should be + * used henceforth as identifier of the + * connection. + * @param p_doorbell - qed will fill the address of the + * doorbell. + * @return 0 on sucesss, otherwise error value. + * @release_conn: release a previously acquired iscsi connection + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @offload_conn: configures an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param conn_info - the configuration to use for the + * offload. + * @return 0 on success, otherwise error value. + * @update_conn: updates an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param conn_info - the configuration to use for the + * offload. + * @return 0 on success, otherwise error value. + * @destroy_conn: stops an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @clear_sq: clear all task in sq + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @get_stats: iSCSI related statistics + * @param cdev + * @param stats - pointer to struck that would be filled + * we stats + * @return 0 on success, error otherwise. + */ +struct qed_iscsi_ops { + const struct qed_common_ops *common; + + const struct qed_ll2_ops *ll2; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_iscsi_info *info); + + void (*register_ops)(struct qed_dev *cdev, + struct qed_iscsi_cb_ops *ops, void *cookie); + + int (*start)(struct qed_dev *cdev, + struct qed_iscsi_tid *tasks, + void *event_context, iscsi_event_cb_t async_event_cb); + + int (*stop)(struct qed_dev *cdev); + + int (*acquire_conn)(struct qed_dev *cdev, + u32 *handle, + u32 *fw_cid, void __iomem **p_doorbell); + + int (*release_conn)(struct qed_dev *cdev, u32 handle); + + int (*offload_conn)(struct qed_dev *cdev, + u32 handle, + struct qed_iscsi_params_offload *conn_info); + + int (*update_conn)(struct qed_dev *cdev, + u32 handle, + struct qed_iscsi_params_update *conn_info); + + int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn); + + int (*clear_sq)(struct qed_dev *cdev, u32 handle); + + int (*get_stats)(struct qed_dev *cdev, + struct qed_iscsi_stats *stats); +}; + +const struct qed_iscsi_ops *qed_get_iscsi_ops(void); +void qed_put_iscsi_ops(void); +#endif -- cgit v1.2.3 From 95a22caee396cef0bb2ca8fafdd82966a49367bb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 1 Dec 2016 11:32:06 +0100 Subject: tcp: randomize tcp timestamp offsets for each connection jiffies based timestamps allow for easy inference of number of devices behind NAT translators and also makes tracking of hosts simpler. commit ceaa1fef65a7c2e ("tcp: adding a per-socket timestamp offset") added the main infrastructure that is needed for per-connection ts randomization, in particular writing/reading the on-wire tcp header format takes the offset into account so rest of stack can use normal tcp_time_stamp (jiffies). So only two items are left: - add a tsoffset for request sockets - extend the tcp isn generator to also return another 32bit number in addition to the ISN. Re-use of ISN generator also means timestamps are still monotonically increasing for same connection quadruple, i.e. PAWS will still work. Includes fixes from Eric Dumazet. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 00e0ee8f001f..734bab4c3bef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -123,6 +123,7 @@ struct tcp_request_sock { u32 txhash; u32 rcv_isn; u32 snt_isn; + u32 ts_off; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# -- cgit v1.2.3 From 7091d8c7055d7310339435ae3af2fb490a92524d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 1 Dec 2016 14:06:37 +0200 Subject: net/sched: cls_flower: Add offload support using egress Hardware device In order to support hardware offloading when the device given by the tc rule is different from the Hardware underline device, extract the mirred (egress) device from the tc action when a filter is added, using the new tc_action_ops, get_dev(). Flower caches the information about the mirred device and use it for calling ndo_setup_tc in filter change, update stats and delete. Calling ndo_setup_tc of the mirred (egress) device instead of the ingress device will allow a resolution between the software ingress device and the underline hardware device. The resolution will take place inside the offloading driver using 'egress_device' flag added to tc_to_netdev struct which is provided to the offloading driver. Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3755317cc6a9..1ff5ea6e1221 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -802,6 +802,7 @@ struct tc_to_netdev { struct tc_cls_matchall_offload *cls_mall; struct tc_cls_bpf_offload *cls_bpf; }; + bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed -- cgit v1.2.3 From b2cd12574aa3e1625f471ff57cde7f628a18a46b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Dec 2016 08:48:03 -0800 Subject: bpf: Refactor cgroups code in prep for new type Code move and rename only; no functional change intended. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0cf1adfadd2d..af2ca8b432c0 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,31 +36,31 @@ void cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int __cgroup_bpf_run_filter(struct sock *sk, - struct sk_buff *skb, - enum bpf_attach_type type); - -/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */ -#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter(sk, skb, \ - BPF_CGROUP_INET_INGRESS); \ - \ - __ret; \ +int __cgroup_bpf_run_filter_skb(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ - typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ - __ret = __cgroup_bpf_run_filter(__sk, skb, \ - BPF_CGROUP_INET_EGRESS); \ - } \ - __ret; \ +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ }) #else -- cgit v1.2.3 From 61023658760032e97869b07d54be9681d2529e77 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Dec 2016 08:48:04 -0800 Subject: bpf: Add new cgroup attach type to enable sock modifications Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run any time a process in the cgroup opens an AF_INET or AF_INET6 socket. Currently only sk_bound_dev_if is exported to userspace for modification by a bpf program. This allows a cgroup to be configured such that AF_INET{6} sockets opened by processes are automatically bound to a specific device. In turn, this enables the running of programs that do not support SO_BINDTODEVICE in a specific VRF context / L3 domain. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index af2ca8b432c0..7b6e5d168c95 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -40,6 +40,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -63,6 +66,16 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, __ret; \ }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk) { \ + __ret = __cgroup_bpf_run_filter_sk(sk, \ + BPF_CGROUP_INET_SOCK_CREATE); \ + } \ + __ret; \ +}) + #else struct cgroup_bpf {}; @@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ -- cgit v1.2.3 From adc176c5472214971d77c1a61c83db9b01e9cdc7 Mon Sep 17 00:00:00 2001 From: Erik Nordmark Date: Fri, 2 Dec 2016 14:00:08 -0800 Subject: ipv6 addrconf: Implemented enhanced DAD (RFC7527) Implemented RFC7527 Enhanced DAD. IPv6 duplicate address detection can fail if there is some temporary loopback of Ethernet frames. RFC7527 solves this by including a random nonce in the NS messages used for DAD, and if an NS is received with the same nonce it is assumed to be a looped back DAD probe and is ignored. RFC7527 is enabled by default. Can be disabled by setting both of conf/{all,interface}/enhanced_dad to zero. Signed-off-by: Erik Nordmark Signed-off-by: Bob Gilligan Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3f95233b2733..671d014e6429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -68,6 +68,7 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_SEG6_HMAC __s32 seg6_require_hmac; #endif + __u32 enhanced_dad; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From c51d39010a1bccc9c1294e2d7c00005aefeb2b5c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:25 +0100 Subject: netfilter: conntrack: built-in support for DCCP CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection tracking support for DCCP protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| dccp | ipv4 | ipv6 | nf_conntrack ---------++--------+--------+--------+-------------- none || 469140 | 828755 | 828676 | 6141434 DCCP || - | 830566 | 829935 | 6533526 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82058d1..ff721d7325cf 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include +#include struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; -- cgit v1.2.3 From 40fc3423b983b864bf70b03199191260ae9b2ea6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:50 -0800 Subject: tcp: tsq: add tsq_flags / tsq_enum This is a cleanup, to ease code review of following patches. Old 'enum tsq_flags' is renamed, and a new enumeration is added with the flags used in cmpxchg() operations as opposed to single bit operations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 734bab4c3bef..d8be083ab0b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -364,7 +364,7 @@ struct tcp_sock { u32 *saved_syn; }; -enum tsq_flags { +enum tsq_enum { TSQ_THROTTLED, TSQ_QUEUED, TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ @@ -375,6 +375,15 @@ enum tsq_flags { */ }; +enum tsq_flags { + TSQF_THROTTLED = (1UL << TSQ_THROTTLED), + TSQF_QUEUED = (1UL << TSQ_QUEUED), + TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; -- cgit v1.2.3 From 7aa5470c2c09265902b5e4289afa82e4e7c2987e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:57 -0800 Subject: tcp: tsq: move tsq_flags close to sk_wmem_alloc tsq_flags being in the same cache line than sk_wmem_alloc makes a lot of sense. Both fields are changed from tcp_wfree() and more generally by various TSQ related functions. Prior patch made room in struct sock and added sk_tsq_flags, this patch deletes tsq_flags from struct tcp_sock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d8be083ab0b0..fc5848dad7a4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -186,7 +186,6 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - unsigned long tsq_flags; /* Data for direct copy to user */ struct { -- cgit v1.2.3 From 7bd509e311f408f7a5132fcdde2069af65fa05ae Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 4 Dec 2016 23:19:41 +0100 Subject: bpf: add prog_digest and expose it via fdinfo/netlink When loading a BPF program via bpf(2), calculate the digest over the program's instruction stream and store it in struct bpf_prog's digest member. This is done at a point in time before any instructions are rewritten by the verifier. Any unstable map file descriptor number part of the imm field will be zeroed for the hash. fdinfo example output for progs: # cat /proc/1590/fdinfo/5 pos: 0 flags: 02000002 mnt_id: 11 prog_type: 1 prog_jited: 1 prog_digest: b27e8b06da22707513aa97363dfb11c7c3675d28 memlock: 4096 When programs are pinned and retrieved by an ELF loader, the loader can check the program's digest through fdinfo and compare it against one that was generated over the ELF file's program section to see if the program needs to be reloaded. Furthermore, this can also be exposed through other means such as netlink in case of a tc cls/act dump (or xdp in future), but also through tracepoints or other facilities to identify the program. Other than that, the digest can also serve as a base name for the work in progress kallsyms support of programs. The digest doesn't depend/select the crypto layer, since we need to keep dependencies to a minimum. iproute2 will get support for this facility. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/filter.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 69d0a7f12a3b..8796ff03f472 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,6 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +void bpf_prog_calc_digest(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index 97338134398f..f078d2b1cff6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -56,6 +57,9 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +/* Maximum BPF program size in bytes. */ +#define MAX_BPF_SIZE (BPF_MAXINSNS * sizeof(struct bpf_insn)) + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -404,8 +408,9 @@ struct bpf_prog { cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); - u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ + u32 len; /* Number of filter blocks */ + u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, -- cgit v1.2.3 From 0aa8c57a04907a5d02068ff9f917629be97ea78d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:44 -0500 Subject: netfilter: introduce accessor functions for hook entries This allows easier future refactoring. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 69230140215b..575aa198097e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,6 +79,33 @@ struct nf_hook_entry { const struct nf_hook_ops *orig_ops; }; +static inline void +nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) +{ + entry->next = NULL; + entry->ops = *ops; + entry->orig_ops = ops; +} + +static inline int +nf_hook_entry_priority(const struct nf_hook_entry *entry) +{ + return entry->ops.priority; +} + +static inline int +nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, + struct nf_hook_state *state) +{ + return entry->ops.hook(entry->ops.priv, skb, state); +} + +static inline const struct nf_hook_ops * +nf_hook_entry_ops(const struct nf_hook_entry *entry) +{ + return entry->orig_ops; +} + static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, -- cgit v1.2.3 From d415b9eb76fc55c03ef5451691170aa5771dcea3 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:45 -0500 Subject: netfilter: decouple nf_hook_entry and nf_hook_ops During nfhook traversal we only need a very small subset of nf_hook_ops members. We need: - next element - hook function to call - hook function priv argument Bridge netfilter also needs 'thresh'; can be obtained via ->orig_ops. nf_hook_entry struct is now 32 bytes on x86_64. A followup patch will turn the run-time list into an array that only stores hook functions plus their priv arguments, eliminating the ->next element. Suggested-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 575aa198097e..a4b97be30b28 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -75,7 +75,8 @@ struct nf_hook_ops { struct nf_hook_entry { struct nf_hook_entry __rcu *next; - struct nf_hook_ops ops; + nf_hookfn *hook; + void *priv; const struct nf_hook_ops *orig_ops; }; @@ -83,21 +84,22 @@ static inline void nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) { entry->next = NULL; - entry->ops = *ops; + entry->hook = ops->hook; + entry->priv = ops->priv; entry->orig_ops = ops; } static inline int nf_hook_entry_priority(const struct nf_hook_entry *entry) { - return entry->ops.priority; + return entry->orig_ops->priority; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { - return entry->ops.hook(entry->ops.priv, skb, state); + return entry->hook(entry->priv, skb, state); } static inline const struct nf_hook_ops * -- cgit v1.2.3 From 4d31eef5176df06f218201bc9c0ce40babb41660 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:17 +0100 Subject: netfilter: x_tables: pass xt_counters struct instead of packet counter On SMP we overload the packet counter (unsigned long) to contain percpu offset. Hide this from callers and pass xt_counters address instead. Preparation patch to allocate the percpu counters in page-sized batch chunks. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cd4eaf8df445..6e61edeb68e3 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void) return 0; } -static inline void xt_percpu_counter_free(u64 pcnt) -{ - if (nr_cpu_ids > 1) - free_percpu((void __percpu *) (unsigned long) pcnt); -} +void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) -- cgit v1.2.3 From f28e15bacedd444608e25421c72eb2cf4527c9ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:18 +0100 Subject: netfilter: x_tables: pass xt_counters struct to counter allocator Keeps some noise away from a followup patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6e61edeb68e3..05a94bd32c55 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -404,32 +404,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a, } -/* On SMP, ip(6)t_entry->counters.pcnt holds address of the - * real (percpu) counter. On !SMP, its just the packet count, - * so nothing needs to be done there. - * - * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. We force an alignment of 16 bytes - * so that bytes/packets share a common cache line. - * - * Hence caller must use IS_ERR_VALUE to check for error, this - * allows us to return 0 for single core systems without forcing - * callers to deal with SMP vs. NONSMP issues. - */ -static inline unsigned long xt_percpu_counter_alloc(void) -{ - if (nr_cpu_ids > 1) { - void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - - if (res == NULL) - return -ENOMEM; - - return (__force unsigned long) res; - } - - return 0; -} +bool xt_percpu_counter_alloc(struct xt_counters *counters); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * -- cgit v1.2.3 From ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:19 +0100 Subject: netfilter: x_tables: pack percpu counter allocations instead of allocating each xt_counter individually, allocate 4k chunks and then use these for counter allocation requests. This should speed up rule evaluation by increasing data locality, also speeds up ruleset loading because we reduce calls to the percpu allocator. As Eric points out we can't use PAGE_SIZE, page_allocator would fail on arches with 64k page size. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 05a94bd32c55..5117e4d2ddfa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +struct xt_percpu_counter_alloc_state { + unsigned int off; + const char __percpu *mem; +}; -bool xt_percpu_counter_alloc(struct xt_counters *counters); +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * -- cgit v1.2.3 From df122f58b834b24c27d7e2ac02a4910d3e56f6ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Nov 2016 11:40:05 +0100 Subject: netfilter: ingress: translate 0 nf_hook_slow retval to -1 The caller assumes that < 0 means that skb was stolen (or free'd). All other return values continue skb processing. nf_hook_slow returns 3 different return value types: A) a (negative) errno value: the skb was dropped (NF_DROP, e.g. by iptables '-j DROP' rule). B) 0. The skb was stolen by the hook or queued to userspace. C) 1. all hooks returned NF_ACCEPT so the caller should invoke the okfn so packet processing can continue. nft ingress facility currently doesn't have the 'okfn' that the NF_HOOK() macros use; there is no nfqueue support either. So 1 means that nf_hook_ingress() caller should go on processing the skb. In order to allow use of NF_STOLEN from ingress we need to translate this to an errno number, else we'd crash because we continue with already-free'd (or about to be free-d) skb. The errno value isn't checked, its just important that its less than 0, so return -1. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 2dc3b49b804a..59476061de86 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; + int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. @@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e); + if (ret == 0) + return -1; + + return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) -- cgit v1.2.3 From 89caaa2d80b7bf9bd8632cd3137254f8c685e5db Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 7 Dec 2016 15:20:07 +0100 Subject: net: stmmac: add support for independent DMA pbl for tx/rx GMAC and newer supports independent programmable burst lengths for DMA tx/rx. Add new optional devicetree properties representing this. To be backwards compatible, snps,pbl will still be valid, but snps,txpbl/snps,rxpbl will override the value in snps,pbl if set. If the IP is synthesized to use the AXI interface, there is a register and a matching DT property inside the optional stmmac-axi-config DT node for controlling burst lengths, named snps,blen. However, using this register, it is not possible to control tx and rx independently. Also, this register is not available if the IP was synthesized with, e.g., the AHB interface. Signed-off-by: Niklas Cassel Acked-by: Alexandre Torgue Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 3537fb33cc90..e6d7a5940819 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -88,6 +88,8 @@ struct stmmac_mdio_bus_data { struct stmmac_dma_cfg { int pbl; + int txpbl; + int rxpbl; int fixed_burst; int mixed_burst; bool aal; -- cgit v1.2.3 From 4022d039a315951e59d95d22e79198d861ce4490 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 7 Dec 2016 15:20:08 +0100 Subject: net: smmac: allow configuring lower pbl values The driver currently always sets the PBLx8/PBLx4 bit, which means that the pbl values configured via the pbl/txpbl/rxpbl DT properties are always multiplied by 8/4 in the hardware. In order to allow the DT to configure lower pbl values, while at the same time not changing behavior of any existing device trees using the pbl/txpbl/rxpbl settings, add a property to disable the multiplication of the pbl by 8/4 in the hardware. Suggested-by: Rabin Vincent Signed-off-by: Niklas Cassel Acked-by: Alexandre Torgue Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e6d7a5940819..266dab9ad782 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -90,6 +90,7 @@ struct stmmac_dma_cfg { int pbl; int txpbl; int rxpbl; + bool pblx8; int fixed_burst; int mixed_burst; bool aal; -- cgit v1.2.3 From 13bfff25c081f4e060af761c4082b5a96f756810 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2016 08:29:10 -0800 Subject: net: rfs: add a jump label RFS is not commonly used, so add a jump label to avoid some conditionals in fast path. Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1ff5ea6e1221..994f7423a74b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -192,6 +192,7 @@ struct net_device_stats { #ifdef CONFIG_RPS #include extern struct static_key rps_needed; +extern struct static_key rfs_needed; #endif struct neighbour; -- cgit v1.2.3 From c8c8b127091b758f5768f906bcdeeb88bc9951ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2016 09:19:33 -0800 Subject: udp: under rx pressure, try to condense skbs Under UDP flood, many softirq producers try to add packets to UDP receive queue, and one user thread is burning one cpu trying to dequeue packets as fast as possible. Two parts of the per packet cost are : - copying payload from kernel space to user space, - freeing memory pieces associated with skb. If socket is under pressure, softirq handler(s) can try to pull in skb->head the payload of the packet if it fits. Meaning the softirq handler(s) can free/reuse the page fragment immediately, instead of letting udp_recvmsg() do this hundreds of usec later, possibly from another node. Additional gains : - We reduce skb->truesize and thus can store more packets per SO_RCVBUF - We avoid cache line misses at copyout() time and consume_skb() time, and avoid one put_page() with potential alien freeing on NUMA hosts. This comes at the cost of a copy, bounded to available tail room, which is usually small. (We might have to fix GRO_MAX_HEAD which looks bigger than necessary) This patch gave me about 5 % increase in throughput in my tests. skb_condense() helper could probably used in other contexts. Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c535fbccf2c..0cd92b0f2af5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1966,6 +1966,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } +void skb_condense(struct sk_buff *skb); + /** * skb_headroom - bytes at buffer head * @skb: buffer to check -- cgit v1.2.3 From d2a4dd37f6b41fbcad76efbf63124eb3126c66fe Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Dec 2016 10:57:59 -0800 Subject: bpf: fix state equivalence Commmits 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") and 484611357c19 ("bpf: allow access into map value arrays") by themselves are correct, but in combination they make state equivalence ignore 'id' field of the register state which can lead to accepting invalid program. Fixes: 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers") Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7453c1281531..a13b031dc6b8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -18,13 +18,6 @@ struct bpf_reg_state { enum bpf_reg_type type; - /* - * Used to determine if any memory access using this register will - * result in a bad access. - */ - s64 min_value; - u64 max_value; - u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; @@ -40,6 +33,13 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; }; + u32 id; + /* Used to determine if any memory access using this register will + * result in a bad access. These two fields must be last. + * See states_equal() + */ + s64 min_value; + u64 max_value; }; enum bpf_stack_slot_type { -- cgit v1.2.3 From f38e7a32ee4fc9c8aeeac59e6e0462cd281586e3 Mon Sep 17 00:00:00 2001 From: "Woojung.Huh@microchip.com" Date: Wed, 7 Dec 2016 20:26:07 +0000 Subject: phy: add phy fixup unregister functions >From : Woojung Huh Add functions to unregister phy fixup for modules. int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask) Unregister phy fixup from phy_fixup_list per bus_id, phy_uid & phy_uid_mask int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask) Unregister phy fixup from phy_fixup_list. Use it for fixup registered by phy_register_fixup_for_uid() int phy_unregister_fixup_for_id(const char *bus_id) Unregister phy fixup from phy_fixup_list. Use it for fixup registered by phy_register_fixup_for_id() Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index feb8a98e8dd3..f7d95f644eed 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -860,6 +860,10 @@ int phy_register_fixup_for_id(const char *bus_id, int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); +int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask); +int phy_unregister_fixup_for_id(const char *bus_id); +int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); + int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -- cgit v1.2.3 From 17bedab2723145d17b14084430743549e6943d03 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 7 Dec 2016 15:53:11 -0800 Subject: bpf: xdp: Allow head adjustment in XDP prog This patch allows XDP prog to extend/remove the packet data at the head (like adding or removing header). It is done by adding a new XDP helper bpf_xdp_adjust_head(). It also renames bpf_helper_changes_skb_data() to bpf_helper_changes_pkt_data() to better reflect that XDP prog does not work on skb. This patch adds one "xdp_adjust_head" bit to bpf_prog for the XDP-capable driver to check if the XDP prog requires bpf_xdp_adjust_head() support. The driver can then decide to error out during XDP_SETUP_PROG. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f078d2b1cff6..6a1658308612 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -406,7 +406,8 @@ struct bpf_prog { u16 jited:1, /* Is our filter JIT'ed? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + xdp_adjust_head:1; /* Adjusting pkt head? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ @@ -440,6 +441,7 @@ struct bpf_skb_data_end { struct xdp_buff { void *data; void *data_end; + void *data_hard_start; }; /* compute the linear packet data range [data, data_end) which @@ -595,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); -bool bpf_helper_changes_skb_data(void *func); +bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); -- cgit v1.2.3 From c84d949057cab262b4d3110ead9a42a58c2958f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Dec 2016 11:41:55 -0800 Subject: udp: copy skb->truesize in the first cache line In UDP RX handler, we currently clear skb->dev before skb is added to receive queue, because device pointer is no longer available once we exit from RCU section. Since this first cache line is always hot, lets reuse this space to store skb->truesize and thus avoid a cache line miss at udp_recvmsg()/udp_skb_destructor time while receive queue spinlock is held. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0cd92b0f2af5..332e76756f54 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,8 +645,15 @@ struct sk_buff { struct rb_node rbnode; /* used in netem & tcp stack */ }; struct sock *sk; - struct net_device *dev; + union { + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; + }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you -- cgit v1.2.3 From 6b229cf77d683f634f0edd876c6d1015402303ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Dec 2016 11:41:56 -0800 Subject: udp: add batching to udp_rmem_release() If udp_recvmsg() constantly releases sk_rmem_alloc for every read packet, it gives opportunity for producers to immediately grab spinlocks and desperatly try adding another packet, causing false sharing. We can add a simple heuristic to give the signal by batches of ~25 % of the queue capacity. This patch considerably increases performance under flood by about 50 %, since the thread draining the queue is no longer slowed by false sharing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index d1fd8cd39478..c0f530809d1f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -79,6 +79,9 @@ struct udp_sock { int (*gro_complete)(struct sock *sk, struct sk_buff *skb, int nhoff); + + /* This field is dirtied by udp_recvmsg() */ + int forward_deficit; }; static inline struct udp_sock *udp_sk(const struct sock *sk) -- cgit v1.2.3