From 6e7333d315a768170a59ac771297ee0551bdddbf Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 1 Feb 2016 18:51:05 -0500 Subject: net: add rx_nohandler stat counter This adds an rx_nohandler stat counter, along with a sysfs statistics node, and copies the counter out via netlink as well. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Daniel Borkmann CC: Tom Herbert CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a30b78090594..d3e90b91e07e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -35,6 +35,8 @@ struct rtnl_link_stats { /* for cslip etc */ __u32 rx_compressed; __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ }; /* The main device statistics structure */ @@ -68,6 +70,8 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ }; /* The struct should be in sync with struct ifmap */ -- cgit v1.2.3 From 824bd0ce6c7c43a9e1e210abf124958e54d88342 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Feb 2016 22:39:53 -0800 Subject: bpf: introduce BPF_MAP_TYPE_PERCPU_HASH map Introduce BPF_MAP_TYPE_PERCPU_HASH map type which is used to do accurate counters without need to use BPF_XADD instruction which turned out to be too costly for high-performance network monitoring. In the typical use case the 'key' is the flow tuple or other long living object that sees a lot of events per second. bpf_map_lookup_elem() returns per-cpu area. Example: struct { u32 packets; u32 bytes; } * ptr = bpf_map_lookup_elem(&map, &key); /* ptr points to this_cpu area of the value, so the following * increments will not collide with other cpus */ ptr->packets ++; ptr->bytes += skb->len; bpf_update_elem() atomically creates a new element where all per-cpu values are zero initialized and this_cpu value is populated with given 'value'. Note that non-per-cpu hash map always allocates new element and then deletes old after rcu grace period to maintain atomicity of update. Per-cpu hash map updates element values in-place. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa6f8571de13..43ae40c8763e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, }; enum bpf_prog_type { -- cgit v1.2.3 From a10423b87a7eae75da79ce80a8d9475047a674ee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Feb 2016 22:39:54 -0800 Subject: bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map Primary use case is a histogram array of latency where bpf program computes the latency of block requests or other events and stores histogram of latency into array of 64 elements. All cpus are constantly running, so normal increment is not accurate, bpf_xadd causes cache ping-pong and this per-cpu approach allows fastest collision-free counters. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ae40c8763e..2ee0fde1bf96 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -82,6 +82,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, }; enum bpf_prog_type { -- cgit v1.2.3 From 67eb03318bc5fe170ae832423fda7a23b0d801cf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 2 Feb 2016 07:43:45 -0800 Subject: net: Add support for fill_slave_info to VRF device Allows userspace to have direct access to VRF table association versus looking up master device and its table. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d3e90b91e07e..d452cea59020 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -405,6 +405,14 @@ enum { #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, -- cgit v1.2.3 From 103a8ad1fa3b261c78dfc842cb315defe9d40be0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 3 Feb 2016 04:04:36 +0100 Subject: ethtool: add speed/duplex validation functions Add functions which check if the speed/duplex are defined. Signed-off-by: Nikolay Aleksandrov Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 57fa39005e79..b2e180181629 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1319,11 +1319,45 @@ enum ethtool_sfeatures_retval_bits { #define SPEED_UNKNOWN -1 +static inline int ethtool_validate_speed(__u32 speed) +{ + switch (speed) { + case SPEED_10: + case SPEED_100: + case SPEED_1000: + case SPEED_2500: + case SPEED_5000: + case SPEED_10000: + case SPEED_20000: + case SPEED_25000: + case SPEED_40000: + case SPEED_50000: + case SPEED_56000: + case SPEED_100000: + case SPEED_UNKNOWN: + return 1; + } + + return 0; +} + /* Duplex, half or full. */ #define DUPLEX_HALF 0x00 #define DUPLEX_FULL 0x01 #define DUPLEX_UNKNOWN 0xff +static inline int ethtool_validate_duplex(__u8 duplex) +{ + switch (duplex) { + case DUPLEX_HALF: + case DUPLEX_FULL: + case DUPLEX_UNKNOWN: + return 1; + } + + return 0; +} + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 -- cgit v1.2.3 From 157ede6784ba2837c7dc43f195418c75927f8488 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 3 Feb 2016 09:57:04 +0100 Subject: bridge: mdb: add support for offloaded mdb entries Add new bitmask member 'flags' to br_mdb_entry structure. Adding MDB_FLAGS_OFFLOAD bit which indicates MDB entries is offloaded to hardware. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 18db14477bdd..ec3547234998 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -183,6 +183,8 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; +#define MDB_FLAGS_OFFLOAD (1 << 0) + __u8 flags; __u16 vid; struct { union { -- cgit v1.2.3 From 12b74dfadb5a7a23baf4db941dc9fd9d371f249a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Feb 2016 13:31:17 +0100 Subject: ipv4: add option to drop unicast encapsulated in L2 multicast In order to solve a problem with 802.11, the so-called hole-196 attack, add an option (sysctl) called "drop_unicast_in_l2_multicast" which, if enabled, causes the stack to drop IPv4 unicast packets encapsulated in link-layer multi- or broadcast frames. Such frames can (as an attack) be created by any member of the same wireless network and transmitted as valid encrypted frames since the symmetric key for broadcast frames is shared between all stations. Additionally, enabling this option provides compliance with a SHOULD clause of RFC 1122. Reviewed-by: Julian Anastasov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 08f894d2ddbd..584834f7e95c 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -165,6 +165,7 @@ enum IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, __IPV4_DEVCONF_MAX }; -- cgit v1.2.3 From 97daf331455077645ae1f13438bebd3d1a2e94ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Feb 2016 13:31:18 +0100 Subject: ipv4: add option to drop gratuitous ARP packets In certain 802.11 wireless deployments, there will be ARP proxies that use knowledge of the network to correctly answer requests. To prevent gratuitous ARP frames on the shared medium from being a problem, on such deployments wireless needs to drop them. Enable this by providing an option called "drop_gratuitous_arp". Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 584834f7e95c..f291569768dd 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -166,6 +166,7 @@ enum IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + IPV4_DEVCONF_DROP_GRATUITOUS_ARP, __IPV4_DEVCONF_MAX }; -- cgit v1.2.3 From abbc30436d39dfed8ebfca338d253f211ac7b094 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Feb 2016 13:31:19 +0100 Subject: ipv6: add option to drop unicast encapsulated in L2 multicast In order to solve a problem with 802.11, the so-called hole-196 attack, add an option (sysctl) called "drop_unicast_in_l2_multicast" which, if enabled, causes the stack to drop IPv6 unicast packets encapsulated in link-layer multi- or broadcast frames. Such frames can (as an attack) be created by any member of the same wireless network and transmitted as valid encrypted frames since the symmetric key for broadcast frames is shared between all stations. Reviewed-by: Julian Anastasov Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 38b4fef20219..4c413570efe8 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -174,6 +174,7 @@ enum { DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_MAX }; -- cgit v1.2.3 From 7a02bf892d8f1e5298af1676f001bee410509d80 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Feb 2016 13:31:20 +0100 Subject: ipv6: add option to drop unsolicited neighbor advertisements In certain 802.11 wireless deployments, there will be NA proxies that use knowledge of the network to correctly answer requests. To prevent unsolicitd advertisements on the shared medium from being a problem, on such deployments wireless needs to drop them. Enable this by providing an option called "drop_unsolicited_na". Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 4c413570efe8..ec117b65d5a5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -175,6 +175,7 @@ enum { DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_MAX }; -- cgit v1.2.3 From 72bb68721f80a1441e871b6afc9ab0b3793d5031 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 5 Feb 2016 11:16:21 +0000 Subject: ethtool: add IPv6 to the NFC API Signed-off-by: Edward Cree Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 70 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b2e180181629..5e0940dcbfe8 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -748,6 +748,56 @@ struct ethtool_usrip4_spec { __u8 proto; }; +/** + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc. + * @ip6src: Source host + * @ip6dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tclass: Traffic Class + * + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow. + */ +struct ethtool_tcpip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be16 psrc; + __be16 pdst; + __u8 tclass; +}; + +/** + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @spi: Security parameters index + * @tclass: Traffic Class + * + * This can be used to specify an IPsec transport or tunnel over IPv6. + */ +struct ethtool_ah_espip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 spi; + __u8 tclass; +}; + +/** + * struct ethtool_usrip6_spec - general flow specification for IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tclass: Traffic Class + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers) + */ +struct ethtool_usrip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 l4_4_bytes; + __u8 tclass; + __u8 l4_proto; +}; + union ethtool_flow_union { struct ethtool_tcpip4_spec tcp_ip4_spec; struct ethtool_tcpip4_spec udp_ip4_spec; @@ -755,6 +805,12 @@ union ethtool_flow_union { struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; + struct ethtool_tcpip6_spec tcp_ip6_spec; + struct ethtool_tcpip6_spec udp_ip6_spec; + struct ethtool_tcpip6_spec sctp_ip6_spec; + struct ethtool_ah_espip6_spec ah_ip6_spec; + struct ethtool_ah_espip6_spec esp_ip6_spec; + struct ethtool_usrip6_spec usr_ip6_spec; struct ethhdr ether_spec; __u8 hdata[52]; }; @@ -1401,15 +1457,17 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ #define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ #define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash only */ -#define UDP_V6_FLOW 0x06 /* hash only */ -#define SCTP_V6_FLOW 0x07 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ +#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ +#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ #define AH_ESP_V6_FLOW 0x08 /* hash only */ #define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ #define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash only */ -#define ESP_V6_FLOW 0x0c /* hash only */ -#define IP_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ +#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ +#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IP_USER_FLOW IPV4_USER_FLOW +#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ -- cgit v1.2.3 From 4456ed04ea44b800d691b18c14a68ec9894d2aca Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 7 Feb 2016 23:27:55 +0200 Subject: ethtool: future-proof interface for speed extensions Many virtual and not quite virtual devices allow any speed to be set through ethtool. In particular, this applies to the virtio-net devices. Document this fact to make sure people don't assume the enum lists all possible values. Reserve values greater than INT_MAX for future extension and to avoid conflict with SPEED_UNKNOWN. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 5e0940dcbfe8..4345f80a2e33 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -31,7 +31,7 @@ * physical connectors and other link features that are * advertised through autonegotiation or enabled for * auto-detection. - * @speed: Low bits of the speed + * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN * @duplex: Duplex mode; one of %DUPLEX_* * @port: Physical connector type; one of %PORT_* * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not @@ -47,7 +47,7 @@ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. * @maxrxpkt: Historically used to report RX IRQ coalescing; now * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. - * @speed_hi: High bits of the speed + * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of * %ETH_TP_MDI_*. If the status is unknown or not applicable, the * value will be %ETH_TP_MDI_INVALID. Read-only. @@ -1359,7 +1359,7 @@ enum ethtool_sfeatures_retval_bits { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */ +/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 -- cgit v1.2.3 From 4a92602aa1cd5bbaeedbd9536ff992f7d26fe9d1 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Fri, 5 Feb 2016 09:20:52 -0700 Subject: openvswitch: allow management from inside user namespaces Operations with the GENL_ADMIN_PERM flag fail permissions checks because this flag means we call netlink_capable, which uses the init user ns. Instead, let's introduce a new flag, GENL_UNS_ADMIN_PERM for operations which should be allowed inside a user namespace. The motivation for this is to be able to run openvswitch in unprivileged containers. I've tested this and it seems to work, but I really have no idea about the security consequences of this patch, so thoughts would be much appreciated. v2: use the GENL_UNS_ADMIN_PERM flag instead of a check in each function v3: use separate ifs for UNS_ADMIN_PERM and ADMIN_PERM, instead of one massive one Reported-by: James Page Signed-off-by: Tycho Andersen CC: Eric Biederman CC: Pravin Shelar CC: Justin Pettit CC: "David S. Miller" Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index c3363ba1ae05..5512c90af7e3 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -21,6 +21,7 @@ struct genlmsghdr { #define GENL_CMD_CAP_DO 0x02 #define GENL_CMD_CAP_DUMP 0x04 #define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 /* * List of reserved static generic netlink identifiers: -- cgit v1.2.3 From e02564ee334a7ae46b71fc18576391cb9455433e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 7 Feb 2016 21:52:23 +0100 Subject: ethtool: make validate_speed accept all speeds between 0 and INT_MAX Devices these days can have any speed and as was recently pointed out any speed from 0 to INT_MAX is valid so adjust speed validation to accept such values. Signed-off-by: Nikolay Aleksandrov Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 4345f80a2e33..190aea0faaf4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1377,24 +1377,7 @@ enum ethtool_sfeatures_retval_bits { static inline int ethtool_validate_speed(__u32 speed) { - switch (speed) { - case SPEED_10: - case SPEED_100: - case SPEED_1000: - case SPEED_2500: - case SPEED_5000: - case SPEED_10000: - case SPEED_20000: - case SPEED_25000: - case SPEED_40000: - case SPEED_50000: - case SPEED_56000: - case SPEED_100000: - case SPEED_UNKNOWN: - return 1; - } - - return 0; + return speed <= INT_MAX || speed == SPEED_UNKNOWN; } /* Duplex, half or full. */ -- cgit v1.2.3 From cd9b266095f422267bddbec88f9098b48ea548fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Feb 2016 22:02:53 -0800 Subject: tcp: add tcpi_min_rtt and tcpi_notsent_bytes to tcp_info tcpi_min_rtt reports the minimal rtt observed by TCP stack for the flow, in usec unit. Might be ~0U if not yet known. tcpi_notsent_bytes reports the amount of bytes in the write queue that were not yet sent. This is done in a single patch to not add a temporary 32bit padding hole in tcp_info. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 65a77b071e22..fe95446e9abf 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -196,6 +196,9 @@ struct tcp_info { __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ + + __u32 tcpi_notsent_bytes; + __u32 tcpi_min_rtt; }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From d1b4c689d4130bcfd3532680b64db562300716b6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 18 Feb 2016 15:03:24 +0100 Subject: netlink: remove mmapped netlink support mmapped netlink has a number of unresolved issues: - TX zerocopy support had to be disabled more than a year ago via commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.") because the content of the mmapped area can change after netlink attribute validation but before message processing. - RX support was implemented mainly to speed up nfqueue dumping packet payload to userspace. However, since commit ae08ce0021087a5d812d2 ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy with the socket-based interface too (via the skb_zerocopy helper). The other problem is that skbs attached to mmaped netlink socket behave different from normal skbs: - they don't have a shinfo area, so all functions that use skb_shinfo() (e.g. skb_clone) cannot be used. - reserving headroom prevents userspace from seeing the content as it expects message to start at skb->head. See for instance commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump"). - skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we crash because it needs the sk to check if a tx ring is attached. Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359 ("netfilter: nfnetlink: use original skbuff when acking batches"). mmaped netlink also didn't play nicely with the skb_zerocopy helper used by nfqueue and openvswitch. Daniel Borkmann fixed this via commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue zero-copy")' but at the cost of also needing to provide remaining length to the allocation function. nfqueue also has problems when used with mmaped rx netlink: - mmaped netlink doesn't allow use of nfqueue batch verdict messages. Problem is that in the mmap case, the allocation time also determines the ordering in which the frame will be seen by userspace (A allocating before B means that A is located in earlier ring slot, but this also means that B might get a lower sequence number then A since seqno is decided later. To fix this we would need to extend the spinlocked region to also cover the allocation and message setup which isn't desirable. - nfqueue can now be configured to queue large (GSO) skbs to userspace. Queing GSO packets is faster than having to force a software segmentation in the kernel, so this is a desirable option. However, with a mmap based ring one has to use 64kb per ring slot element, else mmap has to fall back to the socket path (NL_MMAP_STATUS_COPY) for all large packets. To use the mmap interface, userspace not only has to probe for mmap netlink support, it also has to implement a recv/socket receive path in order to handle messages that exceed the size of an rx ring element. Cc: Daniel Borkmann Cc: Ken-ichirou MATSUZAWA Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: Thomas Graf Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 4 ++++ include/uapi/linux/netlink_diag.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f095155d8749..0dba4e4ed2be 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -107,8 +107,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 @@ -134,6 +136,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -145,6 +148,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif -- cgit v1.2.3 From 2125715635053d4207a756a35aa718f548824e58 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 16 Feb 2016 12:46:54 +0100 Subject: bridge: mdb: add support for more attributes and export timer Currently mdb entries are exported directly as a structure inside MDBA_MDB_ENTRY_INFO attribute, we can't really extend it without breaking user-space. In order to export new mdb fields, I've converted the MDBA_MDB_ENTRY_INFO into a nested attribute which starts like before with struct br_mdb_entry (without header, as it's casted directly in iproute2) and continues with MDBA_MDB_EATTR_ attributes. This way we keep compatibility with older users and can export new data. I've tested this with iproute2, both with and without support for the added attribute and it works fine. So basically we again have MDBA_MDB_ENTRY_INFO with struct br_mdb_entry inside but it may contain also some additional MDBA_MDB_EATTR_ attributes such as MDBA_MDB_EATTR_TIMER which can be parsed by user-space. So the new structure is: [MDBA_MDB] = { [MDBA_MDB_ENTRY] = { [MDBA_MDB_ENTRY_INFO] [MDBA_MDB_ENTRY_INFO] { <- Nested attribute struct br_mdb_entry <- nla_put_nohdr() [MDBA_MDB_ENTRY attributes] <- normal netlink attributes } } } Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index ec3547234998..0890b217580d 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -137,7 +137,10 @@ struct bridge_vlan_info { /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { - * [MDBA_MDB_ENTRY_INFO] + * [MDBA_MDB_ENTRY_INFO] { + * struct br_mdb_entry + * [MDBA_MDB_EATTR attributes] + * } * } * } * [MDBA_ROUTER] = { @@ -166,6 +169,14 @@ enum { }; #define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) +/* per mdb entry additional attributes */ +enum { + MDBA_MDB_EATTR_UNSPEC, + MDBA_MDB_EATTR_TIMER, + __MDBA_MDB_EATTR_MAX +}; +#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) + enum { MDBA_ROUTER_UNSPEC, MDBA_ROUTER_PORT, -- cgit v1.2.3 From ac2c7ad0e5d6030452c9af2fafd192e17fd04264 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Feb 2016 09:24:01 -0500 Subject: net/ethtool: introduce a new ioctl for per queue setting Introduce a new ioctl ETHTOOL_PERQUEUE for per queue parameters setting. The following patches will enable some SUB_COMMANDs for per queue setting. Signed-off-by: Kan Liang Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 190aea0faaf4..f15ae02621a1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1202,6 +1202,21 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) #define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) +#define MAX_NUM_QUEUE 4096 + +/** + * struct ethtool_per_queue_op - apply sub command to the queues in mask. + * @cmd: ETHTOOL_PERQUEUE + * @sub_command: the sub command which apply to each queues + * @queue_mask: Bitmap of the queues which sub command apply to + * @data: A complete command structure following for each of the queues addressed + */ +struct ethtool_per_queue_op { + __u32 cmd; + __u32 sub_command; + __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + char data[]; +}; /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ @@ -1285,6 +1300,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ #define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ +#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET -- cgit v1.2.3 From d5a3b1f691865be576c2bffa708549b8cdccda19 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 17 Feb 2016 19:58:58 -0800 Subject: bpf: introduce BPF_MAP_TYPE_STACK_TRACE add new map type to store stack traces and corresponding helper bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id @ctx: struct pt_regs* @map: pointer to stack_trace map @flags: bits 0-7 - numer of stack frames to skip bit 8 - collect user stack instead of kernel bit 9 - compare stacks by hash only bit 10 - if two different stacks hash into the same stackid discard old other bits - reserved Return: >= 0 stackid on success or negative error stackid is a 32-bit integer handle that can be further combined with other data (including other stackid) and used as a key into maps. Userspace will access stackmap using standard lookup/delete syscall commands to retrieve full stack trace for given stackid. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2ee0fde1bf96..d3e77da8e9e8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -83,6 +83,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, }; enum bpf_prog_type { @@ -272,6 +273,20 @@ enum bpf_func_id { */ BPF_FUNC_perf_event_output, BPF_FUNC_skb_load_bytes, + + /** + * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + */ + BPF_FUNC_get_stackid, __BPF_FUNC_MAX_ID, }; @@ -294,6 +309,12 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ #define BPF_F_TUNINFO_IPV6 (1ULL << 0) +/* BPF_FUNC_get_stackid flags. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ -- cgit v1.2.3 From 7d672345ed295b1356a5d9f7111da1d1d7d65867 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 19 Feb 2016 23:05:23 +0100 Subject: bpf: add generic bpf_csum_diff helper For L4 checksums, we currently have bpf_l4_csum_replace() helper. It's currently limited to handle 2 and 4 byte changes in a header and feeds the from/to into inet_proto_csum_replace{2,4}() helpers of the kernel. When working with IPv6, for example, this makes it rather cumbersome to deal with, similarly when editing larger parts of a header. Instead, extend the API in a more generic way: For bpf_l4_csum_replace(), add a case for header field mask of 0 to change the checksum at a given offset through inet_proto_csum_replace_by_diff(), and provide a helper bpf_csum_diff() that can generically calculate a from/to diff for arbitrary amounts of data. This can be used in multiple ways: for the bpf_l4_csum_replace() only part, this even provides us with the option to insert precalculated diffs from user space f.e. from a map, or from bpf_csum_diff() during runtime. bpf_csum_diff() has a optional from/to stack buffer input, so we can calculate a diff by using a scratchbuffer for scenarios where we're inserting (from is NULL), removing (to is NULL) or diffing (from/to buffers don't need to be of equal size) data. Also, bpf_csum_diff() allows to feed a previous csum into csum_partial(), so the function can also be cascaded. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d3e77da8e9e8..48d0a6c54609 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -287,6 +287,17 @@ enum bpf_func_id { * Return: >= 0 stackid on success or negative error */ BPF_FUNC_get_stackid, + + /** + * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result + */ + BPF_FUNC_csum_diff, __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 2f72959a9c1260ade234f353ccca91118151af66 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 19 Feb 2016 23:05:26 +0100 Subject: bpf: fix csum update in bpf_l4_csum_replace helper for udp When using this helper for updating UDP checksums, we need to extend this in order to write CSUM_MANGLED_0 for csum computations that result into 0 as sum. Reason we need this is because packets with a checksum could otherwise become incorrectly marked as a packet without a checksum. Likewise, if the user indicates BPF_F_MARK_MANGLED_0, then we should not turn packets without a checksum into ones with a checksum. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 48d0a6c54609..6496f98d3d68 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -313,6 +313,7 @@ enum bpf_func_id { /* BPF_FUNC_l4_csum_replace flags. */ #define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ #define BPF_F_INGRESS (1ULL << 0) -- cgit v1.2.3 From d4634e8dea13ccc969dd3f33dab3873cfdf3bc51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Tue, 19 Jan 2016 10:42:42 -0500 Subject: rfkill: Update userspace API documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a note to userspace on the effect of RFKILL_OP_CHANGE_ALL also updating the default state for hotplugged devices. Signed-off-by: João Paulo Rechi Vita [reword a bit] Signed-off-by: Johannes Berg --- include/uapi/linux/rfkill.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 058757f7a733..2e00dcebebd0 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -59,6 +59,8 @@ enum rfkill_type { * @RFKILL_OP_DEL: a device was removed * @RFKILL_OP_CHANGE: a device's state changed -- userspace changes one device * @RFKILL_OP_CHANGE_ALL: userspace changes all devices (of a type, or all) + * into a state, also updating the default state used for devices that + * are hot-plugged later. */ enum rfkill_operation { RFKILL_OP_ADD = 0, -- cgit v1.2.3 From 34d505193bd10668acf1caba02d2f66bddc23fea Mon Sep 17 00:00:00 2001 From: Lior David Date: Thu, 28 Jan 2016 10:58:25 +0200 Subject: cfg80211: basic support for PBSS network type PBSS (Personal Basic Service Set) is a new BSS type for DMG networks. It is similar to infrastructure BSS, having an AP-like entity called PCP (PBSS Control Point), but it has few differences. PBSS support is mandatory for 11ad devices. Add support for PBSS by introducing a new PBSS flag attribute. The PBSS flag is used in the START_AP command to request starting a PCP instead of an AP, and in the CONNECT command to request connecting to a PCP instead of an AP. Signed-off-by: Lior David Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5b7b5ebe7ca8..7758969a2a8e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1789,6 +1789,10 @@ enum nl80211_commands { * thus it must not specify the number of iterations, only the interval * between scans. The scan plans are executed sequentially. * Each scan plan is a nested attribute of &enum nl80211_sched_scan_plan. + * @NL80211_ATTR_PBSS: flag attribute. If set it means operate + * in a PBSS. Specified in %NL80211_CMD_CONNECT to request + * connecting to a PCP, and in %NL80211_CMD_START_AP to start + * a PCP instead of AP. Relevant for DMG networks only. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2164,6 +2168,8 @@ enum nl80211_attrs { NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, NL80211_ATTR_SCHED_SCAN_PLANS, + NL80211_ATTR_PBSS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 0c9ca11b1ae8eb16c1b6bbae91991392d2321372 Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Wed, 17 Feb 2016 20:30:00 +0200 Subject: cfg80211: Add global RRM capability Today, the supplicant will add the RRM capabilities Information Element in the association request only if Quiet period is supported (NL80211_FEATURE_QUIET). Quiet is one of many RRM features, and there are other RRM features that are not related to Quiet (e.g. neighbor report). Therefore, requiring Quiet to enable RRM is too restrictive. Some of the features, like neighbor report, can be supported by user space without any help from the kernel. Hence adding the RRM capabilities IE to association request should be the sole user space's decision. Removing the RRM dependency on Quiet in the driver solves this problem, but using an old driver with a user space tool that would not require Quiet feature would be problematic: the user space would add NL80211_ATTR_USE_RRM in the association request even if the kernel doesn't advertize NL80211_FEATURE_QUIET and the association would be denied by the kernel. This solution adds a global RRM capability, that tells user space that it can request RRM capabilities IE publishment without any specific feature support in the kernel. Signed-off-by: Beni Lev Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7758969a2a8e..5a30a7563633 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1727,6 +1727,8 @@ enum nl80211_commands { * underlying device supports these minimal RRM features: * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES, * %NL80211_FEATURE_QUIET, + * Or, if global RRM is supported, see: + * %NL80211_EXT_FEATURE_RRM * If this flag is used, driver must add the Power Capabilities IE to the * association request. In addition, it must also set the RRM capability * flag in the association request's Capability Info field. @@ -4402,12 +4404,18 @@ enum nl80211_feature_flags { /** * enum nl80211_ext_feature_index - bit index of extended features. * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. + * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can + * can request to use RRM (see %NL80211_ATTR_USE_RRM) with + * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set + * the ASSOC_REQ_USE_RRM flag in the association request even if + * NL80211_FEATURE_QUIET is not advertized. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VHT_IBSS, + NL80211_EXT_FEATURE_RRM, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Feb 2016 09:25:37 -0800 Subject: net: ipv6: Make address flushing on ifdown optional Currently, all ipv6 addresses are flushed when the interface is configured down, including global, static addresses: $ ip -6 addr show dev eth1 3: eth1: mtu 1500 state UP qlen 1000 inet6 2100:1::2/120 scope global valid_lft forever preferred_lft forever inet6 fe80::e0:f9ff:fe79:34bd/64 scope link valid_lft forever preferred_lft forever $ ip link set dev eth1 down $ ip -6 addr show dev eth1 << nothing; all addresses have been flushed>> Add a new sysctl to make this behavior optional. The new setting defaults to flush all addresses to maintain backwards compatibility. When the set global addresses with no expire times are not flushed on an admin down. The sysctl is per-interface or system-wide for all interfaces $ sysctl -w net.ipv6.conf.eth1.keep_addr_on_down=1 or $ sysctl -w net.ipv6.conf.all.keep_addr_on_down=1 Will keep addresses on eth1 on an admin down. $ ip -6 addr show dev eth1 3: eth1: mtu 1500 state UP qlen 1000 inet6 2100:1::2/120 scope global valid_lft forever preferred_lft forever inet6 fe80::e0:f9ff:fe79:34bd/64 scope link valid_lft forever preferred_lft forever $ ip link set dev eth1 down $ ip -6 addr show dev eth1 3: eth1: mtu 1500 state DOWN qlen 1000 inet6 2100:1::2/120 scope global tentative valid_lft forever preferred_lft forever inet6 fe80::e0:f9ff:fe79:34bd/64 scope link tentative valid_lft forever preferred_lft forever Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index ec117b65d5a5..395876060f50 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,6 +176,7 @@ enum { DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, + DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_MAX }; -- cgit v1.2.3 From 3f1ac7a700d039c61d8d8b99f28d605d489a60cf Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 24 Feb 2016 10:57:59 -0800 Subject: net: ethtool: add new ETHTOOL_xLINKSETTINGS API This patch defines a new ETHTOOL_GLINKSETTINGS/SLINKSETTINGS API, handled by the new get_link_ksettings/set_link_ksettings callbacks. This API provides support for most legacy ethtool_cmd fields, adds support for larger link mode masks (up to 4064 bits, variable length), and removes ethtool_cmd deprecated fields (transceiver/maxrxpkt/maxtxpkt). This API is deprecating the legacy ETHTOOL_GSET/SSET API and provides the following backward compatibility properties: - legacy ethtool with legacy drivers: no change, still using the get_settings/set_settings callbacks. - legacy ethtool with new get/set_link_ksettings drivers: the new driver callbacks are used, data internally converted to legacy ethtool_cmd. ETHTOOL_GSET will return only the 1st 32b of each link mode mask. ETHTOOL_SSET will fail if user tries to set the ethtool_cmd deprecated fields to non-0 (transceiver/maxrxpkt/maxtxpkt). A kernel warning is logged if driver sets higher bits. - future ethtool with legacy drivers: no change, still using the get_settings/set_settings callbacks, internally converted to new data structure. Deprecated fields (transceiver/maxrxpkt/maxtxpkt) will be ignored and seen as 0 from user space. Note that that "future" ethtool tool will not allow changes to these deprecated fields. - future ethtool with new drivers: direct call to the new callbacks. By "future" ethtool, what is meant is: - query: first try ETHTOOL_GLINKSETTINGS, and revert to ETHTOOL_GSET if fails - set: query first and remember which of ETHTOOL_GLINKSETTINGS or ETHTOOL_GSET was successful + if ETHTOOL_GLINKSETTINGS was successful, then change config with ETHTOOL_SLINKSETTINGS. A failure there is final (do not try ETHTOOL_SSET). + otherwise ETHTOOL_GSET was successful, change config with ETHTOOL_SSET. A failure there is final (do not try ETHTOOL_SLINKSETTINGS). The interaction user/kernel via the new API requires a small ETHTOOL_GLINKSETTINGS handshake first to agree on the length of the link mode bitmaps. If kernel doesn't agree with user, it returns the bitmap length it is expecting from user as a negative length (and cmd field is 0). When kernel and user agree, kernel returns valid info in all fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS). Data structure crossing user/kernel boundary is 32/64-bit agnostic. Converted internally to a legal kernel bitmap. The internal __ethtool_get_settings kernel helper will gradually be replaced by __ethtool_get_link_ksettings by the time the first "link_settings" drivers start to appear. So this patch doesn't change it, it will be removed before it needs to be changed. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 322 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 256 insertions(+), 66 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f15ae02621a1..37fd6dc33de4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -21,7 +21,8 @@ */ /** - * struct ethtool_cmd - link control and status + * struct ethtool_cmd - DEPRECATED, link control and status + * This structure is DEPRECATED, please use struct ethtool_link_settings. * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET * @supported: Bitmask of %SUPPORTED_* flags for the link modes, * physical connectors and other link features for which the @@ -1219,8 +1220,12 @@ struct ethtool_per_queue_op { }; /* CMDs currently supported */ -#define ETHTOOL_GSET 0x00000001 /* Get settings. */ -#define ETHTOOL_SSET 0x00000002 /* Set settings. */ +#define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. + * Please use ETHTOOL_GLINKSETTINGS + */ +#define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings. + * Please use ETHTOOL_SLINKSETTINGS + */ #define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ #define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */ #define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ @@ -1302,73 +1307,139 @@ struct ethtool_per_queue_op { #define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ +#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ +#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET -#define SUPPORTED_10baseT_Half (1 << 0) -#define SUPPORTED_10baseT_Full (1 << 1) -#define SUPPORTED_100baseT_Half (1 << 2) -#define SUPPORTED_100baseT_Full (1 << 3) -#define SUPPORTED_1000baseT_Half (1 << 4) -#define SUPPORTED_1000baseT_Full (1 << 5) -#define SUPPORTED_Autoneg (1 << 6) -#define SUPPORTED_TP (1 << 7) -#define SUPPORTED_AUI (1 << 8) -#define SUPPORTED_MII (1 << 9) -#define SUPPORTED_FIBRE (1 << 10) -#define SUPPORTED_BNC (1 << 11) -#define SUPPORTED_10000baseT_Full (1 << 12) -#define SUPPORTED_Pause (1 << 13) -#define SUPPORTED_Asym_Pause (1 << 14) -#define SUPPORTED_2500baseX_Full (1 << 15) -#define SUPPORTED_Backplane (1 << 16) -#define SUPPORTED_1000baseKX_Full (1 << 17) -#define SUPPORTED_10000baseKX4_Full (1 << 18) -#define SUPPORTED_10000baseKR_Full (1 << 19) -#define SUPPORTED_10000baseR_FEC (1 << 20) -#define SUPPORTED_20000baseMLD2_Full (1 << 21) -#define SUPPORTED_20000baseKR2_Full (1 << 22) -#define SUPPORTED_40000baseKR4_Full (1 << 23) -#define SUPPORTED_40000baseCR4_Full (1 << 24) -#define SUPPORTED_40000baseSR4_Full (1 << 25) -#define SUPPORTED_40000baseLR4_Full (1 << 26) -#define SUPPORTED_56000baseKR4_Full (1 << 27) -#define SUPPORTED_56000baseCR4_Full (1 << 28) -#define SUPPORTED_56000baseSR4_Full (1 << 29) -#define SUPPORTED_56000baseLR4_Full (1 << 30) - -#define ADVERTISED_10baseT_Half (1 << 0) -#define ADVERTISED_10baseT_Full (1 << 1) -#define ADVERTISED_100baseT_Half (1 << 2) -#define ADVERTISED_100baseT_Full (1 << 3) -#define ADVERTISED_1000baseT_Half (1 << 4) -#define ADVERTISED_1000baseT_Full (1 << 5) -#define ADVERTISED_Autoneg (1 << 6) -#define ADVERTISED_TP (1 << 7) -#define ADVERTISED_AUI (1 << 8) -#define ADVERTISED_MII (1 << 9) -#define ADVERTISED_FIBRE (1 << 10) -#define ADVERTISED_BNC (1 << 11) -#define ADVERTISED_10000baseT_Full (1 << 12) -#define ADVERTISED_Pause (1 << 13) -#define ADVERTISED_Asym_Pause (1 << 14) -#define ADVERTISED_2500baseX_Full (1 << 15) -#define ADVERTISED_Backplane (1 << 16) -#define ADVERTISED_1000baseKX_Full (1 << 17) -#define ADVERTISED_10000baseKX4_Full (1 << 18) -#define ADVERTISED_10000baseKR_Full (1 << 19) -#define ADVERTISED_10000baseR_FEC (1 << 20) -#define ADVERTISED_20000baseMLD2_Full (1 << 21) -#define ADVERTISED_20000baseKR2_Full (1 << 22) -#define ADVERTISED_40000baseKR4_Full (1 << 23) -#define ADVERTISED_40000baseCR4_Full (1 << 24) -#define ADVERTISED_40000baseSR4_Full (1 << 25) -#define ADVERTISED_40000baseLR4_Full (1 << 26) -#define ADVERTISED_56000baseKR4_Full (1 << 27) -#define ADVERTISED_56000baseCR4_Full (1 << 28) -#define ADVERTISED_56000baseSR4_Full (1 << 29) -#define ADVERTISED_56000baseLR4_Full (1 << 30) +/* Link mode bit indices */ +enum ethtool_link_mode_bit_indices { + ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0, + ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1, + ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2, + ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5, + ETHTOOL_LINK_MODE_Autoneg_BIT = 6, + ETHTOOL_LINK_MODE_TP_BIT = 7, + ETHTOOL_LINK_MODE_AUI_BIT = 8, + ETHTOOL_LINK_MODE_MII_BIT = 9, + ETHTOOL_LINK_MODE_FIBRE_BIT = 10, + ETHTOOL_LINK_MODE_BNC_BIT = 11, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12, + ETHTOOL_LINK_MODE_Pause_BIT = 13, + ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15, + ETHTOOL_LINK_MODE_Backplane_BIT = 16, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* + * macro for bits > 31. The only way to use indices > 31 is to + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. + */ + + __ETHTOOL_LINK_MODE_LAST + = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, +}; + +#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ + (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT)) + +/* DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new SUPPORTED_* macro for bits > 31. + */ +#define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new SUPPORTED_* macro for bits > 31, see + * notice above. + */ + +/* + * DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new ADERTISE_* macro for bits > 31. + */ +#define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new ADVERTISED_* macro for bits > 31, see + * notice above. + */ /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the @@ -1533,4 +1604,123 @@ enum ethtool_reset_flags { }; #define ETH_RESET_SHARED_SHIFT 16 + +/** + * struct ethtool_link_settings - link control and status + * + * IMPORTANT, Backward compatibility notice: When implementing new + * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and + * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link + * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS + * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in + * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use + * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link + * settings; do not use %ETHTOOL_SLINKSETTINGS if + * %ETHTOOL_GLINKSETTINGS failed: stick to + * %ETHTOOL_GSET/%ETHTOOL_SSET in that case. + * + * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS + * @speed: Link speed (Mbps) + * @duplex: Duplex mode; one of %DUPLEX_* + * @port: Physical connector type; one of %PORT_* + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not + * applicable. For clause 45 PHYs this is the PRTAD. + * @autoneg: Enable/disable autonegotiation and auto-detection; + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO + * protocols supported by the interface; 0 if unknown. + * Read-only. + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the + * value will be %ETH_TP_MDI_INVALID. Read-only. + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. + * When written successfully, the link should be renegotiated if + * necessary. + * @link_mode_masks_nwords: Number of 32-bit words for each of the + * supported, advertising, lp_advertising link mode bitmaps. For + * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user + * (>= 0); on return, if handshake in progress, negative if + * request size unsupported by kernel: absolute value indicates + * kernel recommended size and cmd field is 0, as well as all the + * other fields; otherwise (handshake completed), strictly + * positive to indicate size used by kernel and cmd field is + * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For + * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive + * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise + * refused. For drivers: ignore this field (use kernel's + * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will + * be overwritten by kernel. + * @supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * @advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * @lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple + * link modes. If it is enabled then they are read-only; if the link + * is up they represent the negotiated link mode; if the link is down, + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + * + * Some hardware interfaces may have multiple PHYs and/or physical + * connectors fitted or do not allow the driver to detect which are + * fitted. For these interfaces @port and/or @phy_address may be + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. + * Otherwise, attempts to write different values may be ignored or + * rejected. + * + * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt + * are not available in %ethtool_link_settings. Until all drivers are + * converted to ignore them or to the new %ethtool_link_settings API, + * for both queries and changes, users should always try + * %ETHTOOL_GLINKSETTINGS first, and if it fails with -ENOTSUPP stick + * only to %ETHTOOL_GSET and %ETHTOOL_SSET consistently. If it + * succeeds, then users should stick to %ETHTOOL_GLINKSETTINGS and + * %ETHTOOL_SLINKSETTINGS (which would support drivers implementing + * either %ethtool_cmd or %ethtool_link_settings). + * + * Users should assume that all fields not marked read-only are + * writable and subject to validation by the driver. They should use + * %ETHTOOL_GLINKSETTINGS to get the current values before making specific + * changes and then applying them with %ETHTOOL_SLINKSETTINGS. + * + * Drivers that implement %get_link_ksettings and/or + * %set_link_ksettings should ignore the @cmd + * and @link_mode_masks_nwords fields (any change to them overwritten + * by kernel), and rely only on kernel's internal + * %__ETHTOOL_LINK_MODE_MASK_NBITS and + * %ethtool_link_mode_mask_t. Drivers that implement + * %set_link_ksettings() should validate all fields other than @cmd + * and @link_mode_masks_nwords that are not described as read-only or + * deprecated, and must ignore all fields described as read-only. + */ +struct ethtool_link_settings { + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u32 reserved[8]; + __u32 link_mode_masks[0]; + /* layout of link_mode_masks fields: + * __u32 map_supported[link_mode_masks_nwords]; + * __u32 map_advertising[link_mode_masks_nwords]; + * __u32 map_lp_advertising[link_mode_masks_nwords]; + */ +}; #endif /* _UAPI_LINUX_ETHTOOL_H */ -- cgit v1.2.3 From b07edbe1cf3dae9ba81f24888e2f2a9dbe778918 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 16 Feb 2016 17:24:08 +0100 Subject: netfilter: meta: add PRANDOM support Can be used to randomly match packets e.g. for statistic traffic sampling. See commit 3ad0040573b0c00f8848 ("bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs") for more info why this doesn't use prandom_u32 directly. Unlike bpf nft_meta can be built as a module, so add an EXPORT_SYMBOL for prandom_seed_full_state too. Cc: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be41ffc128b8..b19be0a098c0 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -681,6 +681,7 @@ enum nft_exthdr_attributes { * @NFT_META_IIFGROUP: packet input interface group * @NFT_META_OIFGROUP: packet output interface group * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) + * @NFT_META_PRANDOM: a 32bit pseudo-random number */ enum nft_meta_keys { NFT_META_LEN, @@ -707,6 +708,7 @@ enum nft_meta_keys { NFT_META_IIFGROUP, NFT_META_OIFGROUP, NFT_META_CGROUP, + NFT_META_PRANDOM, }; /** -- cgit v1.2.3 From 9e8ce79cd711d4dfe09d8bba6822cd9bb7db96bd Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 26 Feb 2016 07:54:39 -0800 Subject: net: sched: cls_u32 add bit to specify software only rules In the initial implementation the only way to stop a rule from being inserted into the hardware table was via the device feature flag. However this doesn't work well when working on an end host system where packets are expect to hit both the hardware and software datapaths. For example we can imagine a rule that will match an IP address and increment a field. If we install this rule in both hardware and software we may increment the field twice. To date we have only added support for the drop action so we have been able to ignore these cases. But as we extend the action support we will hit this example plus more such cases. Arguably these are not even corner cases in many working systems these cases will be common. To avoid forcing the driver to always abort (i.e. the above example) this patch adds a flag to add a rule in software only. A careful user can use this flag to build software and hardware datapaths that work together. One example we have found particularly useful is to use hardware resources to set the skb->mark on the skb when the match may be expensive to run in software but a mark lookup in a hash table is cheap. The idea here is hardware can do in one lookup what the u32 classifier may need to traverse multiple lists and hash tables to compute. The flag is only passed down on inserts. On deletion to avoid stale references in hardware we always try to remove a rule if it exists. The flags field is part of the classifier specific options. Although it is tempting to lift this into the generic structure doing this proves difficult do to how the tc netlink attributes are implemented along with how the dump/change routines are called. There is also precedence for putting seemingly generic pieces in the specific classifier options such as TCA_U32_POLICE, TCA_U32_ACT, etc. So although not ideal I've left FLAGS in the u32 options as well as it simplifies the code greatly and user space has already learned how to manage these bits ala 'tc' tool. Another thing if trying to update a rule we require the flags to be unchanged. This is to force user space, software u32 and the hardware u32 to keep in sync. Thanks to Simon Horman for catching this case. Signed-off-by: John Fastabend Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 439873775d49..9874f5680926 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -172,6 +172,7 @@ enum { TCA_U32_INDEV, TCA_U32_PCNT, TCA_U32_MARK, + TCA_U32_FLAGS, __TCA_U32_MAX }; -- cgit v1.2.3 From bfcd3a46617209454cfc0947ab093e37fd1e84ef Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 Feb 2016 17:32:23 +0100 Subject: Introduce devlink infrastructure Introduce devlink infrastructure for drivers to register and expose to userspace via generic Netlink interface. There are two basic objects defined: devlink - one instance for every "parent device", for example switch ASIC devlink port - one instance for every physical port of the device. This initial portion implements basic get/dump of objects to userspace. Also, port splitter and port type setting is implemented. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/uapi/linux/devlink.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h new file mode 100644 index 000000000000..c9fee5781eb1 --- /dev/null +++ b/include/uapi/linux/devlink.h @@ -0,0 +1,72 @@ +/* + * include/uapi/linux/devlink.h - Network physical device Netlink interface + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_DEVLINK_H_ +#define _UAPI_LINUX_DEVLINK_H_ + +#define DEVLINK_GENL_NAME "devlink" +#define DEVLINK_GENL_VERSION 0x1 +#define DEVLINK_GENL_MCGRP_CONFIG_NAME "config" + +enum devlink_command { + /* don't change the order or add anything between, this is ABI! */ + DEVLINK_CMD_UNSPEC, + + DEVLINK_CMD_GET, /* can dump */ + DEVLINK_CMD_SET, + DEVLINK_CMD_NEW, + DEVLINK_CMD_DEL, + + DEVLINK_CMD_PORT_GET, /* can dump */ + DEVLINK_CMD_PORT_SET, + DEVLINK_CMD_PORT_NEW, + DEVLINK_CMD_PORT_DEL, + + DEVLINK_CMD_PORT_SPLIT, + DEVLINK_CMD_PORT_UNSPLIT, + + /* add new commands above here */ + + __DEVLINK_CMD_MAX, + DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 +}; + +enum devlink_port_type { + DEVLINK_PORT_TYPE_NOTSET, + DEVLINK_PORT_TYPE_AUTO, + DEVLINK_PORT_TYPE_ETH, + DEVLINK_PORT_TYPE_IB, +}; + +enum devlink_attr { + /* don't change the order or add anything between, this is ABI! */ + DEVLINK_ATTR_UNSPEC, + + /* bus name + dev name together are a handle for devlink entity */ + DEVLINK_ATTR_BUS_NAME, /* string */ + DEVLINK_ATTR_DEV_NAME, /* string */ + + DEVLINK_ATTR_PORT_INDEX, /* u32 */ + DEVLINK_ATTR_PORT_TYPE, /* u16 */ + DEVLINK_ATTR_PORT_DESIRED_TYPE, /* u16 */ + DEVLINK_ATTR_PORT_NETDEV_IFINDEX, /* u32 */ + DEVLINK_ATTR_PORT_NETDEV_NAME, /* string */ + DEVLINK_ATTR_PORT_IBDEV_NAME, /* string */ + DEVLINK_ATTR_PORT_SPLIT_COUNT, /* u32 */ + DEVLINK_ATTR_PORT_SPLIT_GROUP, /* u32 */ + + /* add new attributes above here, update the policy in devlink.c */ + + __DEVLINK_ATTR_MAX, + DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 +}; + +#endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From 7f0aec7a668419bdbff12de6e8016544f874e708 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 26 Feb 2016 21:20:01 +0100 Subject: bridge: mcast: use names for the different multicast_router types Using raw values makes it difficult to extend and also understand the code, give them names and do explicit per-option manipulation in br_multicast_set_port_router. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 0890b217580d..e47f3bc7f323 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -177,6 +177,13 @@ enum { }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) +/* multicast router types */ +enum { + MDB_RTR_TYPE_DISABLED, + MDB_RTR_TYPE_TEMP_QUERY, + MDB_RTR_TYPE_PERM, +}; + enum { MDBA_ROUTER_UNSPEC, MDBA_ROUTER_PORT, -- cgit v1.2.3 From a55d8246abcc910346771175b521ee2bce5a69b3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 26 Feb 2016 21:20:03 +0100 Subject: bridge: mcast: add support for temporary port router Add support for a temporary router port which doesn't depend only on the incoming query. It can be refreshed if set to the same value, which is a no-op for the rest. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e47f3bc7f323..74ee03a47e79 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -182,6 +182,7 @@ enum { MDB_RTR_TYPE_DISABLED, MDB_RTR_TYPE_TEMP_QUERY, MDB_RTR_TYPE_PERM, + MDB_RTR_TYPE_TEMP }; enum { -- cgit v1.2.3 From 59f78f9f6c2e80dcf0f520be85b660f856217b79 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 26 Feb 2016 21:20:04 +0100 Subject: bridge: mcast: add support for more router port information dumping Allow for more multicast router port information to be dumped such as timer and type attributes. For that that purpose we need to extend the MDBA_ROUTER_PORT attribute similar to how it was done for the mdb entries recently. The new format is thus: [MDBA_ROUTER_PORT] = { <- nested attribute u32 ifindex <- router port ifindex for user-space compatibility [MDBA_ROUTER_PATTR attributes] } This way it remains compatible with older users (they'll simply retrieve the u32 in the beginning) and new users can parse the remaining attributes. It would also allow to add future extensions to the router port without breaking compatibility. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 74ee03a47e79..0536eefff9bf 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -144,7 +144,10 @@ struct bridge_vlan_info { * } * } * [MDBA_ROUTER] = { - * [MDBA_ROUTER_PORT] + * [MDBA_ROUTER_PORT] = { + * u32 ifindex + * [MDBA_ROUTER_PATTR attributes] + * } * } */ enum { @@ -192,6 +195,15 @@ enum { }; #define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) +/* router port attributes */ +enum { + MDBA_ROUTER_PATTR_UNSPEC, + MDBA_ROUTER_PATTR_TIMER, + MDBA_ROUTER_PATTR_TYPE, + __MDBA_ROUTER_PATTR_MAX +}; +#define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) + struct br_port_msg { __u8 family; __u32 ifindex; -- cgit v1.2.3 From ef6980b6becb1afd9d82a4f043749a10ae81bf14 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 27 Feb 2016 08:08:54 -0500 Subject: introduce IFE action This action allows for a sending side to encapsulate arbitrary metadata which is decapsulated by the receiving end. The sender runs in encoding mode and the receiver in decode mode. Both sender and receiver must specify the same ethertype. At some point we hope to have a registered ethertype and we'll then provide a default so the user doesnt have to specify it. For now we enforce the user specify it. Lets show example usage where we encode icmp from a sender towards a receiver with an skbmark of 17; both sender and receiver use ethertype of 0xdead to interop. YYYY: Lets start with Receiver-side policy config: xxx: add an ingress qdisc sudo tc qdisc add dev $ETH ingress xxx: any packets with ethertype 0xdead will be subjected to ife decoding xxx: we then restart the classification so we can match on icmp at prio 3 sudo $TC filter add dev $ETH parent ffff: prio 2 protocol 0xdead \ u32 match u32 0 0 flowid 1:1 \ action ife decode reclassify xxx: on restarting the classification from above if it was an icmp xxx: packet, then match it here and continue to the next rule at prio 4 xxx: which will match based on skb mark of 17 sudo tc filter add dev $ETH parent ffff: prio 3 protocol ip \ u32 match ip protocol 1 0xff flowid 1:1 \ action continue xxx: match on skbmark of 0x11 (decimal 17) and accept sudo tc filter add dev $ETH parent ffff: prio 4 protocol ip \ handle 0x11 fw flowid 1:1 \ action ok xxx: Lets show the decoding policy sudo tc -s filter ls dev $ETH parent ffff: protocol 0xdead xxx: filter pref 2 u32 filter pref 2 u32 fh 800: ht divisor 1 filter pref 2 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 (rule hit 0 success 0) match 00000000/00000000 at 0 (success 0 ) action order 1: ife decode action reclassify index 1 ref 1 bind 1 installed 14 sec used 14 sec type: 0x0 Metadata: allow mark allow hash allow prio allow qmap Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 xxx: Observe that above lists all metadatum it can decode. Typically these submodules will already be compiled into a monolithic kernel or loaded as modules YYYY: Lets show the sender side now .. xxx: Add an egress qdisc on the sender netdev sudo tc qdisc add dev $ETH root handle 1: prio xxx: xxx: Match all icmp packets to 192.168.122.237/24, then xxx: tag the packet with skb mark of decimal 17, then xxx: Encode it with: xxx: ethertype 0xdead xxx: add skb->mark to whitelist of metadatum to send xxx: rewrite target dst MAC address to 02:15:15:15:15:15 xxx: sudo $TC filter add dev $ETH parent 1: protocol ip prio 10 u32 \ match ip dst 192.168.122.237/24 \ match ip protocol 1 0xff \ flowid 1:2 \ action skbedit mark 17 \ action ife encode \ type 0xDEAD \ allow mark \ dst 02:15:15:15:15:15 xxx: Lets show the encoding policy sudo tc -s filter ls dev $ETH parent 1: protocol ip xxx: filter pref 10 u32 filter pref 10 u32 fh 800: ht divisor 1 filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:2 (rule hit 0 success 0) match c0a87aed/ffffffff at 16 (success 0 ) match 00010000/00ff0000 at 8 (success 0 ) action order 1: skbedit mark 17 index 6 ref 1 bind 1 Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 action order 2: ife encode action pipe index 3 ref 1 bind 1 dst MAC: 02:15:15:15:15:15 type: 0xDEAD Metadata: allow mark Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 xxx: test by sending ping from sender to destination Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_ife.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/uapi/linux/tc_act/tc_ife.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h new file mode 100644 index 000000000000..d648ff66586f --- /dev/null +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -0,0 +1,38 @@ +#ifndef __UAPI_TC_IFE_H +#define __UAPI_TC_IFE_H + +#include +#include + +#define TCA_ACT_IFE 25 +/* Flag bits for now just encoding/decoding; mutually exclusive */ +#define IFE_ENCODE 1 +#define IFE_DECODE 0 + +struct tc_ife { + tc_gen; + __u16 flags; +}; + +/*XXX: We need to encode the total number of bytes consumed */ +enum { + TCA_IFE_UNSPEC, + TCA_IFE_PARMS, + TCA_IFE_TM, + TCA_IFE_DMAC, + TCA_IFE_SMAC, + TCA_IFE_TYPE, + TCA_IFE_METALST, + __TCA_IFE_MAX +}; +#define TCA_IFE_MAX (__TCA_IFE_MAX - 1) + +#define IFE_META_SKBMARK 1 +#define IFE_META_HASHID 2 +#define IFE_META_PRIO 3 +#define IFE_META_QMAP 4 +/*Can be overridden at runtime by module option*/ +#define __IFE_META_MAX 5 +#define IFE_META_MAX (__IFE_META_MAX - 1) + +#endif -- cgit v1.2.3 From 8a6bf5da1aefdafd60b73d9122c7af9fd2d7bb9c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 1 Mar 2016 19:55:14 +0100 Subject: netfilter: nft_masq: support port range Complete masquerading support by allowing port range selection. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b19be0a098c0..eeffde196f80 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -951,10 +951,14 @@ enum nft_nat_attributes { * enum nft_masq_attributes - nf_tables masquerade expression attributes * * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + * @NFTA_MASQ_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_MASQ_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) */ enum nft_masq_attributes { NFTA_MASQ_UNSPEC, NFTA_MASQ_FLAGS, + NFTA_MASQ_REG_PROTO_MIN, + NFTA_MASQ_REG_PROTO_MAX, __NFTA_MASQ_MAX }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) -- cgit v1.2.3 From b5d3755a22e0cc4c369c0985aef0c52c2477c1e7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 4 Mar 2016 11:52:16 +0100 Subject: uapi: define DIV_ROUND_UP for userland DIV_ROUND_UP is defined in linux/kernel.h only for the kernel. When ethtool.h is included by a userland app, we got the following error: include/linux/ethtool.h:1218:8: error: variably modified 'queue_mask' at file scope __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; ^ Let's add a common definition in uapi and use it everywhere. Fixes: ac2c7ad0e5d6 ("net/ethtool: introduce a new ioctl for per queue setting") CC: Kan Liang Suggested-by: Ben Hutchings Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 3 ++- include/uapi/linux/kernel.h | 1 + include/uapi/linux/mroute6.h | 9 ++------- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 37fd6dc33de4..9c22249ebf35 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -13,6 +13,7 @@ #ifndef _UAPI_LINUX_ETHTOOL_H #define _UAPI_LINUX_ETHTOOL_H +#include #include #include @@ -1215,7 +1216,7 @@ enum ethtool_sfeatures_retval_bits { struct ethtool_per_queue_op { __u32 cmd; __u32 sub_command; - __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + __u32 queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; char data[]; }; diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h index 321e399457f5..466073f0ce46 100644 --- a/include/uapi/linux/kernel.h +++ b/include/uapi/linux/kernel.h @@ -9,5 +9,6 @@ #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #endif /* _UAPI_LINUX_KERNEL_H */ diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index ce91215cf7e6..5062fb5751e1 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -1,6 +1,7 @@ #ifndef _UAPI__LINUX_MROUTE6_H #define _UAPI__LINUX_MROUTE6_H +#include #include #include @@ -46,14 +47,8 @@ typedef unsigned short mifi_t; typedef __u32 if_mask; #define NIFBITS (sizeof(if_mask) * 8) /* bits per mask */ -#if !defined(__KERNEL__) -#if !defined(DIV_ROUND_UP) -#define DIV_ROUND_UP(x,y) (((x) + ((y) - 1)) / (y)) -#endif -#endif - typedef struct if_set { - if_mask ifs_bits[DIV_ROUND_UP(IF_SETSIZE, NIFBITS)]; + if_mask ifs_bits[__KERNEL_DIV_ROUND_UP(IF_SETSIZE, NIFBITS)]; } if_set; #define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) -- cgit v1.2.3 From 14e2037902d65213842b4e40305ff54a64abbcb6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 4 Mar 2016 11:52:19 +0100 Subject: ethtool.h: define INT_MAX for userland INT_MAX needs limits.h in userland. When ethtool.h is included by a userland app, we got the following error: .../usr/include/linux/ethtool.h: In function 'ethtool_validate_speed': .../usr/include/linux/ethtool.h:1471:18: error: 'INT_MAX' undeclared (first use in this function) return speed <= INT_MAX || speed == SPEED_UNKNOWN ^ Fixes: e02564ee334a ("ethtool: make validate_speed accept all speeds between 0 and INT_MAX") CC: Nikolay Aleksandrov Signed-off-by: Nicolas Dichtel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9c22249ebf35..2835b07416b7 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -17,6 +17,10 @@ #include #include +#ifndef __KERNEL__ +#include /* for INT_MAX */ +#endif + /* All structures exposed to userland should be defined such that they * have the same layout for 32-bit and 64-bit userland. */ -- cgit v1.2.3 From 8afd54c87ad7089734ef0527937a256586ba828a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Mar 2016 15:15:03 +0100 Subject: bpf: add flags to bpf_skb_store_bytes for clearing hash When overwriting parts of the packet with bpf_skb_store_bytes() that were fed previously into skb->hash calculation, we should clear the current hash with skb_clear_hash(), so that a next skb_get_hash() call can determine the correct hash related to this skb. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ee2193287cbe..2e3e90309904 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -305,6 +305,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_store_bytes flags. */ #define BPF_F_RECOMPUTE_CSUM (1ULL << 0) +#define BPF_F_INVALIDATE_HASH (1ULL << 1) /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. -- cgit v1.2.3 From 2208087061c4ad88de188911367effc550144836 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Mar 2016 15:15:05 +0100 Subject: bpf: allow to propagate df in bpf_skb_set_tunnel_key Added by 9a628224a61b ("ip_tunnel: Add dont fragment flag."), allow to feed df flag into tunneling facilities (currently supported on TX by vxlan, geneve and gre) as a hint from eBPF's bpf_skb_set_tunnel_key() helper. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e3e90309904..21ee6d52016f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) +#define BPF_F_DONT_FRAGMENT (1ULL << 2) /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure -- cgit v1.2.3 From 14ca0751c96f8d3d0f52e8ed3b3236f8b34d3460 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Mar 2016 15:15:06 +0100 Subject: bpf: support for access to tunnel options After eBPF being able to programmatically access/manage tunnel key meta data via commit d3aa45ce6b94 ("bpf: add helpers to access tunnel metadata") and more recently also for IPv6 through c6c33454072f ("bpf: support ipv6 for bpf_skb_{set,get}_tunnel_key"), this work adds two complementary helpers to generically access their auxiliary tunnel options. Geneve and vxlan support this facility. For geneve, TLVs can be pushed, and for the vxlan case its GBP extension. I.e. setting tunnel key for geneve case only makes sense, if we can also read/write TLVs into it. In the GBP case, it provides the flexibility to easily map the group policy ID in combination with other helpers or maps. I chose to model this as two separate helpers, bpf_skb_{set,get}_tunnel_opt(), for a couple of reasons. bpf_skb_{set,get}_tunnel_key() is already rather complex by itself, and there may be cases for tunnel key backends where tunnel options are not always needed. If we would have integrated this into bpf_skb_{set,get}_tunnel_key() nevertheless, we are very limited with remaining helper arguments, so keeping compatibility on structs in case of passing in a flat buffer gets more cumbersome. Separating both also allows for more flexibility and future extensibility, f.e. options could be fed directly from a map, etc. Moreover, change geneve's xmit path to test only for info->options_len instead of TUNNEL_GENEVE_OPT flag. This makes it more consistent with vxlan's xmit path and allows for avoiding to specify a protocol flag in the API on xmit, so it can be protocol agnostic. Having info->options_len is enough information that is needed. Tested with vxlan and geneve. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 21ee6d52016f..9221f653fee3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -298,6 +298,17 @@ enum bpf_func_id { * Return: csum result */ BPF_FUNC_csum_diff, + + /** + * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) + * retrieve or populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success for set, option size for get + */ + BPF_FUNC_skb_get_tunnel_opt, + BPF_FUNC_skb_set_tunnel_opt, __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 6c90598174322b8888029e40dd84a4eb01f56afe Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 7 Mar 2016 21:57:15 -0800 Subject: bpf: pre-allocate hash map elements If kprobe is placed on spin_unlock then calling kmalloc/kfree from bpf programs is not safe, since the following dead lock is possible: kfree->spin_lock(kmem_cache_node->lock)...spin_unlock->kprobe-> bpf_prog->map_update->kmalloc->spin_lock(of the same kmem_cache_node->lock) and deadlocks. The following solutions were considered and some implemented, but eventually discarded - kmem_cache_create for every map - add recursion check to slow-path of slub - use reserved memory in bpf_map_update for in_irq or in preempt_disabled - kmalloc via irq_work At the end pre-allocation of all map elements turned out to be the simplest solution and since the user is charged upfront for all the memory, such pre-allocation doesn't affect the user space visible behavior. Since it's impossible to tell whether kprobe is triggered in a safe location from kmalloc point of view, use pre-allocation by default and introduce new BPF_F_NO_PREALLOC flag. While testing of per-cpu hash maps it was discovered that alloc_percpu(GFP_ATOMIC) has odd corner cases and often fails to allocate memory even when 90% of it is free. The pre-allocation of per-cpu hash elements solves this problem as well. Turned out that bpf_map_update() quickly followed by bpf_map_lookup()+bpf_map_delete() is very common pattern used in many of iovisor/bcc/tools, so there is additional benefit of pre-allocation, since such use cases are must faster. Since all hash map elements are now pre-allocated we can remove atomic increment of htab->count and save few more cycles. Also add bpf_map_precharge_memlock() to check rlimit_memlock early to avoid large malloc/free done by users who don't have sufficient limits. Pre-allocation is done with vmalloc and alloc/free is done via percpu_freelist. Here are performance numbers for different pre-allocation algorithms that were implemented, but discarded in favor of percpu_freelist: 1 cpu: pcpu_ida 2.1M pcpu_ida nolock 2.3M bt 2.4M kmalloc 1.8M hlist+spinlock 2.3M pcpu_freelist 2.6M 4 cpu: pcpu_ida 1.5M pcpu_ida nolock 1.8M bt w/smp_align 1.7M bt no/smp_align 1.1M kmalloc 0.7M hlist+spinlock 0.2M pcpu_freelist 2.0M 8 cpu: pcpu_ida 0.7M bt w/smp_align 0.8M kmalloc 0.4M pcpu_freelist 1.5M 32 cpu: kmalloc 0.13M pcpu_freelist 0.49M pcpu_ida nolock is a modified percpu_ida algorithm without percpu_ida_cpu locks and without cross-cpu tag stealing. It's faster than existing percpu_ida, but not as fast as pcpu_freelist. bt is a variant of block/blk-mq-tag.c simlified and customized for bpf use case. bt w/smp_align is using cache line for every 'long' (similar to blk-mq-tag). bt no/smp_align allocates 'long' bitmasks continuously to save memory. It's comparable to percpu_ida and in some cases faster, but slower than percpu_freelist hlist+spinlock is the simplest free list with single spinlock. As expeceted it has very bad scaling in SMP. kmalloc is existing implementation which is still available via BPF_F_NO_PREALLOC flag. It's significantly slower in single cpu and in 8 cpu setup it's 3 times slower than pre-allocation with pcpu_freelist, but saves memory, so in cases where map->max_entries can be large and number of map update/delete per second is low, it may make sense to use it. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9221f653fee3..0e30b19012a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -101,12 +101,15 @@ enum bpf_prog_type { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_NO_PREALLOC (1U << 0) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* prealloc or not */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From ab7ac4eb9832e32a09f4e8042705484d2fb0aad3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 7 Mar 2016 14:11:06 -0800 Subject: kcm: Kernel Connection Multiplexor module This module implements the Kernel Connection Multiplexor. Kernel Connection Multiplexor (KCM) is a facility that provides a message based interface over TCP for generic application protocols. With KCM an application can efficiently send and receive application protocol messages over TCP using datagram sockets. For more information see the included Documentation/networking/kcm.txt Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/kcm.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/uapi/linux/kcm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h new file mode 100644 index 000000000000..a5a530940b99 --- /dev/null +++ b/include/uapi/linux/kcm.h @@ -0,0 +1,40 @@ +/* + * Kernel Connection Multiplexor + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * User API to clone KCM sockets and attach transport socket to a KCM + * multiplexor. + */ + +#ifndef KCM_KERNEL_H +#define KCM_KERNEL_H + +struct kcm_attach { + int fd; + int bpf_fd; +}; + +struct kcm_unattach { + int fd; +}; + +struct kcm_clone { + int fd; +}; + +#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0) +#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1) +#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2) + +#define KCMPROTO_CONNECTED 0 + +/* Socket options */ +#define KCM_RECV_DISABLE 1 + +#endif + -- cgit v1.2.3 From 5b33f48842fa1e13e9c0ea8cc59c1d0df19042db Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 8 Mar 2016 12:42:29 +0200 Subject: net/flower: Introduce hardware offload support This patch is based on a patch made by John Fastabend. It adds support for offloading cls_flower. when NETIF_F_HW_TC is on: flags = 0 => Rule will be processed twice - by hardware, and if still relevant, by software. flags = SKIP_HW => Rull will be processed by software only If hardware fail/not capabale to apply the rule, operation will NOT fail. Filter will be processed by SW only. Acked-by: Jiri Pirko Suggested-by: John Fastabend Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9874f5680926..c43c5f78b9c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -417,6 +417,8 @@ enum { TCA_FLOWER_KEY_TCP_DST, /* be16 */ TCA_FLOWER_KEY_UDP_SRC, /* be16 */ TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From e7f70af111f086a20800ad2e17f544b2e3e0f375 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:03 +0100 Subject: vxlan: support setting IPv6 flow label This work adds support for setting the IPv6 flow label for vxlan per device and through collect metadata (ip_tunnel_key) frontends. The vxlan dst cache does not need any special considerations here, for the cases where caches can be used, the label is static per cache. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d452cea59020..6bebc975031d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -456,6 +456,7 @@ enum { IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From 8eb3b99554b82da968d1fbc00df9f3156c5e2d63 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:04 +0100 Subject: geneve: support setting IPv6 flow label This work adds support for setting the IPv6 flow label for geneve per device and through collect metadata (ip_tunnel_key) frontends. Also here, the geneve dst cache does not need any special considerations, for the cases where caches can be used, the label is static per cache. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6bebc975031d..249eef9a21bd 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum { IFLA_GENEVE_UDP_CSUM, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From 4018ab1875e0d00b84ac61bc15427136ad55849e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:05 +0100 Subject: bpf: support flow label for bpf_skb_{set, get}_tunnel_key This patch extends bpf_tunnel_key with a tunnel_label member, that maps to ip_tunnel_key's label so underlying backends like vxlan and geneve can propagate the label to udp_tunnel6_xmit_skb(), where it's being set in the IPv6 header. It allows for having 20 more bits to encode/decode flow related meta information programmatically. Tested with vxlan and geneve. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e30b19012a5..924f537183fd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; + __u32 tunnel_label; }; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 136ba622de49a6bf1f6e5eab3391ed5d5dbe30e3 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Thu, 10 Mar 2016 08:55:50 +0000 Subject: netconf: add macro to represent all attributes This patch adds macro NETCONFA_ALL to represent all type of netconf attributes for IPv4 and IPv6. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 23cbd34e4ac7..45dfad509c4d 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -19,6 +19,7 @@ enum { __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) +#define NETCONFA_ALL -1 #define NETCONFA_IFINDEX_ALL -1 #define NETCONFA_IFINDEX_DEFAULT -2 -- cgit v1.2.3 From dece8d2b78d19df7fe5e4e965f1f0d1a3e188d1b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 11 Mar 2016 18:07:31 +0100 Subject: uapi: add MACsec bits Signed-off-by: Sabrina Dubroca Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_link.h | 29 ++++++++ include/uapi/linux/if_macsec.h | 161 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 include/uapi/linux/if_macsec.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index ebd10e624598..e25ebcfbcb48 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -173,6 +173,7 @@ header-y += if_hippi.h header-y += if_infiniband.h header-y += if_link.h header-y += if_ltalk.h +header-y += if_macsec.h header-y += if_packet.h header-y += if_phonet.h header-y += if_plip.h diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index ea9221b0331a..4a93051c578c 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -83,6 +83,7 @@ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 249eef9a21bd..8e3f88fa5b59 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -413,6 +413,35 @@ enum { #define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) +/* MACSEC section */ +enum { + IFLA_MACSEC_UNSPEC, + IFLA_MACSEC_SCI, + IFLA_MACSEC_PORT, + IFLA_MACSEC_ICV_LEN, + IFLA_MACSEC_CIPHER_SUITE, + IFLA_MACSEC_WINDOW, + IFLA_MACSEC_ENCODING_SA, + IFLA_MACSEC_ENCRYPT, + IFLA_MACSEC_PROTECT, + IFLA_MACSEC_INC_SCI, + IFLA_MACSEC_ES, + IFLA_MACSEC_SCB, + IFLA_MACSEC_REPLAY_PROTECT, + IFLA_MACSEC_VALIDATION, + __IFLA_MACSEC_MAX, +}; + +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) + +enum macsec_validation_type { + MACSEC_VALIDATE_DISABLED = 0, + MACSEC_VALIDATE_CHECK = 1, + MACSEC_VALIDATE_STRICT = 2, + __MACSEC_VALIDATE_END, + MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, +}; + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h new file mode 100644 index 000000000000..26b0d1e3e3e7 --- /dev/null +++ b/include/uapi/linux/if_macsec.h @@ -0,0 +1,161 @@ +/* + * include/uapi/linux/if_macsec.h - MACsec device + * + * Copyright (c) 2015 Sabrina Dubroca + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _UAPI_MACSEC_H +#define _UAPI_MACSEC_H + +#include + +#define MACSEC_GENL_NAME "macsec" +#define MACSEC_GENL_VERSION 1 + +#define MACSEC_MAX_KEY_LEN 128 + +#define DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define DEFAULT_CIPHER_ALT 0x0080C20001000001ULL + +#define MACSEC_MIN_ICV_LEN 8 +#define MACSEC_MAX_ICV_LEN 32 + +enum macsec_attrs { + MACSEC_ATTR_UNSPEC, + MACSEC_ATTR_IFINDEX, /* u32, ifindex of the MACsec netdevice */ + MACSEC_ATTR_RXSC_CONFIG, /* config, nested macsec_rxsc_attrs */ + MACSEC_ATTR_SA_CONFIG, /* config, nested macsec_sa_attrs */ + MACSEC_ATTR_SECY, /* dump, nested macsec_secy_attrs */ + MACSEC_ATTR_TXSA_LIST, /* dump, nested, macsec_sa_attrs for each TXSA */ + MACSEC_ATTR_RXSC_LIST, /* dump, nested, macsec_rxsc_attrs for each RXSC */ + MACSEC_ATTR_TXSC_STATS, /* dump, nested, macsec_txsc_stats_attr */ + MACSEC_ATTR_SECY_STATS, /* dump, nested, macsec_secy_stats_attr */ + __MACSEC_ATTR_END, + NUM_MACSEC_ATTR = __MACSEC_ATTR_END, + MACSEC_ATTR_MAX = __MACSEC_ATTR_END - 1, +}; + +enum macsec_secy_attrs { + MACSEC_SECY_ATTR_UNSPEC, + MACSEC_SECY_ATTR_SCI, + MACSEC_SECY_ATTR_ENCODING_SA, + MACSEC_SECY_ATTR_WINDOW, + MACSEC_SECY_ATTR_CIPHER_SUITE, + MACSEC_SECY_ATTR_ICV_LEN, + MACSEC_SECY_ATTR_PROTECT, + MACSEC_SECY_ATTR_REPLAY, + MACSEC_SECY_ATTR_OPER, + MACSEC_SECY_ATTR_VALIDATE, + MACSEC_SECY_ATTR_ENCRYPT, + MACSEC_SECY_ATTR_INC_SCI, + MACSEC_SECY_ATTR_ES, + MACSEC_SECY_ATTR_SCB, + __MACSEC_SECY_ATTR_END, + NUM_MACSEC_SECY_ATTR = __MACSEC_SECY_ATTR_END, + MACSEC_SECY_ATTR_MAX = __MACSEC_SECY_ATTR_END - 1, +}; + +enum macsec_rxsc_attrs { + MACSEC_RXSC_ATTR_UNSPEC, + MACSEC_RXSC_ATTR_SCI, /* config/dump, u64 */ + MACSEC_RXSC_ATTR_ACTIVE, /* config/dump, u8 0..1 */ + MACSEC_RXSC_ATTR_SA_LIST, /* dump, nested */ + MACSEC_RXSC_ATTR_STATS, /* dump, nested, macsec_rxsc_stats_attr */ + __MACSEC_RXSC_ATTR_END, + NUM_MACSEC_RXSC_ATTR = __MACSEC_RXSC_ATTR_END, + MACSEC_RXSC_ATTR_MAX = __MACSEC_RXSC_ATTR_END - 1, +}; + +enum macsec_sa_attrs { + MACSEC_SA_ATTR_UNSPEC, + MACSEC_SA_ATTR_AN, /* config/dump, u8 0..3 */ + MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ + MACSEC_SA_ATTR_PN, /* config/dump, u32 */ + MACSEC_SA_ATTR_KEY, /* config, data */ + MACSEC_SA_ATTR_KEYID, /* config/dump, u64 */ + MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ + __MACSEC_SA_ATTR_END, + NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, + MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1, +}; + +enum macsec_nl_commands { + MACSEC_CMD_GET_TXSC, + MACSEC_CMD_ADD_RXSC, + MACSEC_CMD_DEL_RXSC, + MACSEC_CMD_UPD_RXSC, + MACSEC_CMD_ADD_TXSA, + MACSEC_CMD_DEL_TXSA, + MACSEC_CMD_UPD_TXSA, + MACSEC_CMD_ADD_RXSA, + MACSEC_CMD_DEL_RXSA, + MACSEC_CMD_UPD_RXSA, +}; + +/* u64 per-RXSC stats */ +enum macsec_rxsc_stats_attr { + MACSEC_RXSC_STATS_ATTR_UNSPEC, + MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, + MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, + __MACSEC_RXSC_STATS_ATTR_END, + NUM_MACSEC_RXSC_STATS_ATTR = __MACSEC_RXSC_STATS_ATTR_END, + MACSEC_RXSC_STATS_ATTR_MAX = __MACSEC_RXSC_STATS_ATTR_END - 1, +}; + +/* u32 per-{RX,TX}SA stats */ +enum macsec_sa_stats_attr { + MACSEC_SA_STATS_ATTR_UNSPEC, + MACSEC_SA_STATS_ATTR_IN_PKTS_OK, + MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID, + MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID, + MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA, + MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA, + MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, + MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, + __MACSEC_SA_STATS_ATTR_END, + NUM_MACSEC_SA_STATS_ATTR = __MACSEC_SA_STATS_ATTR_END, + MACSEC_SA_STATS_ATTR_MAX = __MACSEC_SA_STATS_ATTR_END - 1, +}; + +/* u64 per-TXSC stats */ +enum macsec_txsc_stats_attr { + MACSEC_TXSC_STATS_ATTR_UNSPEC, + MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, + MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, + MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, + MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, + __MACSEC_TXSC_STATS_ATTR_END, + NUM_MACSEC_TXSC_STATS_ATTR = __MACSEC_TXSC_STATS_ATTR_END, + MACSEC_TXSC_STATS_ATTR_MAX = __MACSEC_TXSC_STATS_ATTR_END - 1, +}; + +/* u64 per-SecY stats */ +enum macsec_secy_stats_attr { + MACSEC_SECY_STATS_ATTR_UNSPEC, + MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, + MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, + MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, + MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, + MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, + __MACSEC_SECY_STATS_ATTR_END, + NUM_MACSEC_SECY_STATS_ATTR = __MACSEC_SECY_STATS_ATTR_END, + MACSEC_SECY_STATS_ATTR_MAX = __MACSEC_SECY_STATS_ATTR_END - 1, +}; + +#endif /* _UAPI_MACSEC_H */ -- cgit v1.2.3 From a44d6eacdaf56f74fad699af7f4925a5f5ac0e7f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 14 Mar 2016 10:52:15 -0700 Subject: tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Per RFC4898, they count segments sent/received containing a positive length data segment (that includes retransmission segments carrying data). Unlike tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments carrying no data (e.g. pure ack). The patch also updates the segs_in in tcp_fastopen_add_skb() so that segs_in >= data_segs_in property is kept. Together with retransmission data, tcpi_data_segs_out gives a better signal on the rxmit rate. v6: Rebase on the latest net-next v5: Eric pointed out that checking skb->len is still needed in tcp_fastopen_add_skb() because skb can carry a FIN without data. Hence, instead of open coding segs_in and data_segs_in, tcp_segs_in() helper is used. Comment is added to the fastopen case to explain why segs_in has to be reset and tcp_segs_in() has to be called before __skb_pull(). v4: Add comment to the changes in tcp_fastopen_add_skb() and also add remark on this case in the commit message. v3: Add const modifier to the skb parameter in tcp_segs_in() v2: Rework based on recent fix by Eric: commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") Signed-off-by: Martin KaFai Lau Cc: Chris Rapier Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fe95446e9abf..53e8e3fe6b1b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -199,6 +199,8 @@ struct tcp_info { __u32 tcpi_notsent_bytes; __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From bfa3f9d7f3b349acea8982d2248e33a0ed84c687 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 10 Mar 2016 10:54:16 -0800 Subject: netfilter: Remove IP_CT_NEW_REPLY definition. Remove the definition of IP_CT_NEW_REPLY from the kernel as it does not make sense. This allows the definition of IP_CT_NUMBER to be simplified as well. Signed-off-by: Jarno Rajahalme Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 319f47128db8..6d074d14ee27 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -20,9 +20,15 @@ enum ip_conntrack_info { IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, - IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, - /* Number of distinct IP_CT types (no NEW in reply dirn). */ - IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 + /* No NEW in reply direction. */ + + /* Number of distinct IP_CT types. */ + IP_CT_NUMBER, + + /* only for userspace compatibility */ +#ifndef __KERNEL__ + IP_CT_NEW_REPLY = IP_CT_NUMBER, +#endif }; #define NF_CT_STATE_INVALID_BIT (1 << 0) -- cgit v1.2.3 From 05752523e56502cd9975aec0a2ded465d51a71f3 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 10 Mar 2016 10:54:23 -0800 Subject: openvswitch: Interface with NAT. Extend OVS conntrack interface to cover NAT. New nested OVS_CT_ATTR_NAT attribute may be used to include NAT with a CT action. A bare OVS_CT_ATTR_NAT only mangles existing and expected connections. If OVS_NAT_ATTR_SRC or OVS_NAT_ATTR_DST is included within the nested attributes, new (non-committed/non-confirmed) connections are mangled according to the rest of the nested attributes. The corresponding OVS userspace patch series includes test cases (in tests/system-traffic.at) that also serve as example uses. This work extends on a branch by Thomas Graf at https://github.com/tgraf/ovs/tree/nat. Signed-off-by: Jarno Rajahalme Acked-by: Thomas Graf Acked-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/openvswitch.h | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a27222d5b413..616d04761730 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -454,6 +454,14 @@ struct ovs_key_ct_labels { #define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */ #define OVS_CS_F_INVALID 0x10 /* Could not track connection. */ #define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */ +#define OVS_CS_F_SRC_NAT 0x40 /* Packet's source address/port was + * mangled by NAT. + */ +#define OVS_CS_F_DST_NAT 0x80 /* Packet's destination address/port + * was mangled by NAT. + */ + +#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. @@ -632,6 +640,8 @@ struct ovs_action_hash { * mask. For each bit set in the mask, the corresponding bit in the value is * copied to the connection tracking label field in the connection. * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. + * @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address + * translation (NAT) on the packet. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -641,11 +651,50 @@ enum ovs_ct_attr { OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of related connections. */ + OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ __OVS_CT_ATTR_MAX }; #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) +/** + * enum ovs_nat_attr - Attributes for %OVS_CT_ATTR_NAT. + * + * @OVS_NAT_ATTR_SRC: Flag for Source NAT (mangle source address/port). + * @OVS_NAT_ATTR_DST: Flag for Destination NAT (mangle destination + * address/port). Only one of (@OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST) may be + * specified. Effective only for packets for ct_state NEW connections. + * Packets of committed connections are mangled by the NAT action according to + * the committed NAT type regardless of the flags specified. As a corollary, a + * NAT action without a NAT type flag will only mangle packets of committed + * connections. The following NAT attributes only apply for NEW + * (non-committed) connections, and they may be included only when the CT + * action has the @OVS_CT_ATTR_COMMIT flag and either @OVS_NAT_ATTR_SRC or + * @OVS_NAT_ATTR_DST is also included. + * @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr + * @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr + * @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port) + * @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port) + * @OVS_NAT_ATTR_PERSISTENT: Flag for persistent IP mapping across reboots + * @OVS_NAT_ATTR_PROTO_HASH: Flag for pseudo random L4 port mapping (MD5) + * @OVS_NAT_ATTR_PROTO_RANDOM: Flag for fully randomized L4 port mapping + */ +enum ovs_nat_attr { + OVS_NAT_ATTR_UNSPEC, + OVS_NAT_ATTR_SRC, + OVS_NAT_ATTR_DST, + OVS_NAT_ATTR_IP_MIN, + OVS_NAT_ATTR_IP_MAX, + OVS_NAT_ATTR_PROTO_MIN, + OVS_NAT_ATTR_PROTO_MAX, + OVS_NAT_ATTR_PERSISTENT, + OVS_NAT_ATTR_PROTO_HASH, + OVS_NAT_ATTR_PROTO_RANDOM, + __OVS_NAT_ATTR_MAX, +}; + +#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * -- cgit v1.2.3 From 93e68cd6115f67d8363c94dae8206af36f6d3b00 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Wed, 16 Mar 2016 09:12:46 +0000 Subject: net: fix a comment typo Fix a comment typo. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 9cf2394f0bcf..f80277569f24 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -37,7 +37,7 @@ * are shared for all types of net_devices. The sysfs entries are available * via /sys/class/net//flags. Flags which can be toggled through sysfs * are annotated below, note that only a few flags can be toggled and some - * other flags are always always preserved from the original net_device flags + * other flags are always preserved from the original net_device flags * even if you try to set them via sysfs. Flags which are always preserved * are kept under the flag grouping @IFF_VOLATILE. Flags which are volatile * are annotated below as such. -- cgit v1.2.3