From 0c317a02ca982ca093e71bf07cb562265ba40032 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Wed, 12 Oct 2016 18:26:51 +0530 Subject: cfg80211: support virtual interfaces with different beacon intervals This commit provides a mechanism for the host drivers to advertise the support for different beacon intervals among the respective interface combinations in a group, through NL80211_IFACE_COMB_BI_MIN_GCD (u32). This value will be compared against GCD of all beaconing interfaces of matching combinations. If the driver doesn't advertise this value, the old behaviour where all beacon intervals must be identical is retained. If it is specified, then any beacon interval for an interface in the interface combination as well as the GCD of all active beacon intervals in the combination must be greater or equal to this value. Signed-off-by: Purushottam Kushwaha [change commit message, some variable names, small other things] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b4622..1362d24957b5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4280,6 +4280,9 @@ enum nl80211_iface_limit_attrs { * of supported channel widths for radar detection. * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -4287,8 +4290,8 @@ enum nl80211_iface_limit_attrs { * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 * => allows an AP and a STA that must match BIs * - * numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8 - * => allows 8 of AP/GO + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 * => allows two STAs on different channels @@ -4314,6 +4317,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_NUM_CHANNELS, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, /* keep last */ NUM_NL80211_IFACE_COMB, -- cgit v1.2.3 From a52ad514fdf3b8a57ca4322c92d2d8d5c6182485 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 7 Oct 2016 22:04:34 -0400 Subject: net: deprecate eth_change_mtu, remove usage With centralized MTU checking, there's nothing productive done by eth_change_mtu that isn't already done in dev_set_mtu, so mark it as deprecated and remove all usage of it in the kernel. All callers have been audited for calls to alloc_etherdev* or ether_setup directly, which means they all have a valid dev->min_mtu and dev->max_mtu. Now eth_change_mtu prints out a netdev_warn about being deprecated, for the benefit of out-of-tree drivers that might be utilizing it. Of note, dvb_net.c actually had dev->mtu = 4096, while using eth_change_mtu, meaning that if you ever tried changing it's mtu, you couldn't set it above 1500 anymore. It's now getting dev->max_mtu also set to 4096 to remedy that. v2: fix up lantiq_etop, missed breakage due to drive not compiling on x86 CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 117d02e0fc31..864d6f2b2cb0 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -35,6 +35,8 @@ #define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ #define ETH_FCS_LEN 4 /* Octets in the FCS */ +#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */ + /* * These are the defined Ethernet Protocol ID's. */ -- cgit v1.2.3 From d894be57ca92c8a8819ab544d550809e8731137b Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 20 Oct 2016 13:55:16 -0400 Subject: ethernet: use net core MTU range checking in more drivers Somehow, I missed a healthy number of ethernet drivers in the last pass. Most of these drivers either were in need of an updated max_mtu to make jumbo frames possible to enable again. In a few cases, also setting a different min_mtu to match previous lower bounds. There are also a few drivers that had no upper bounds checking, so they're getting a brand new ETH_MAX_MTU that is identical to IP_MAX_MTU, but accessible by includes all ethernet and ethernet-like drivers all have already. acenic: - min_mtu = 0, max_mtu = 9000 amazon/ena: - min_mtu = 128, max_mtu = adapter->max_mtu amd/xgbe: - min_mtu = 0, max_mtu = 9000 sb1250: - min_mtu = 0, max_mtu = 1518 cxgb3: - min_mtu = 81, max_mtu = 65535 cxgb4: - min_mtu = 81, max_mtu = 9600 cxgb4vf: - min_mtu = 81, max_mtu = 65535 benet: - min_mtu = 256, max_mtu = 9000 ibmveth: - min_mtu = 68, max_mtu = 65535 ibmvnic: - min_mtu = adapter->min_mtu, max_mtu = adapter->max_mtu - remove now redundant ibmvnic_change_mtu jme: - min_mtu = 1280, max_mtu = 9202 mv643xx_eth: - min_mtu = 64, max_mtu = 9500 mlxsw: - min_mtu = 0, max_mtu = 65535 - Basically bypassing the core checks, and instead relying on dynamic checks in the respective switch drivers' ndo_change_mtu functions ns83820: - min_mtu = 0 - remove redundant ns83820_change_mtu, only checked for mtu > 1500 netxen: - min_mtu = 0, max_mtu = 8000 (P2), max_mtu = 9600 (P3) qlge: - min_mtu = 1500, max_mtu = 9000 - driver only supports setting mtu to 1500 or 9000, so the core check only rules out < 1500 and > 9000, qlge_change_mtu still needs to check that the value is 1500 or 9000 qualcomm/emac: - min_mtu = 46, max_mtu = 9194 xilinx_axienet: - min_mtu = 64, max_mtu = 9000 Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") CC: netdev@vger.kernel.org CC: Jes Sorensen CC: Netanel Belgazal CC: Tom Lendacky CC: Santosh Raspatur CC: Hariprasad S CC: Sathya Perla CC: Ajit Khaparde CC: Sriharsha Basavapatna CC: Somnath Kotur CC: Thomas Falcon CC: John Allen CC: Guo-Fu Tseng CC: Sebastian Hesselbarth CC: Jiri Pirko CC: Ido Schimmel CC: Manish Chopra CC: Sony Chacko CC: Rajesh Borundia CC: Timur Tabi CC: Anirudha Sarangi CC: John Linn Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 864d6f2b2cb0..3e5185e9ef03 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -36,6 +36,7 @@ #define ETH_FCS_LEN 4 /* Octets in the FCS */ #define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */ +#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */ /* * These are the defined Ethernet Protocol ID's. -- cgit v1.2.3 From 2d0e30c30f84d08dc16f0f2af41f1b8a85f0755e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Oct 2016 12:46:33 +0200 Subject: bpf: add helper for retrieving current numa node id Use case is mainly for soreuseport to select sockets for the local numa node, but since generic, lets also add this for other networking and tracing program types. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f09c70b97eca..374ef582ae18 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -426,6 +426,12 @@ enum bpf_func_id { */ BPF_FUNC_set_hash_invalid, + /** + * bpf_get_numa_node_id() + * Returns the id of the current NUMA node. + */ + BPF_FUNC_get_numa_node_id, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 432490f9d455fb842d70219f22d9d2c812371676 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 21 Oct 2016 13:03:44 +0300 Subject: net: ip, diag -- Add diag interface for raw sockets In criu we are actively using diag interface to collect sockets present in the system when dumping applications. And while for unix, tcp, udp[lite], packet, netlink it works as expected, the raw sockets do not have. Thus add it. v2: - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@) - implement @destroy for diag requests (by dsa@) v3: - add export of raw_abort for IPv6 (by dsa@) - pass net-admin flag into inet_sk_diag_fill due to changes in net-next branch (by dsa@) v4: - use @pad in struct inet_diag_req_v2 for raw socket protocol specification: raw module carries sockets which may have custom protocol passed from socket() syscall and sole @sdiag_protocol is not enough to match underlied ones - start reporting protocol specifed in socket() call when sockets are raw ones for the same reason: user space tools like ss may parse this attribute and use it for socket matching v5 (by eric.dumazet@): - use sock_hold in raw_sock_get instead of atomic_inc, we're holding (raw_v4_hashinfo|raw_v6_hashinfo)->lock when looking up so counter won't be zero here. v6: - use sdiag_raw_protocol() helper which will access @pad structure used for raw sockets protocol specification: we can't simply rename this member without breaking uapi v7: - sine sdiag_raw_protocol() helper is not suitable for uapi lets rather make an alias structure with proper names. __check_inet_diag_req_raw helper will catch if any of structure unintentionally changed. CC: David S. Miller CC: Eric Dumazet CC: David Ahern CC: Alexey Kuznetsov CC: James Morris CC: Hideaki YOSHIFUJI CC: Patrick McHardy CC: Andrey Vagin CC: Stephen Hemminger Signed-off-by: Cyrill Gorcunov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 509cd961068d..bbe201047df6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -43,6 +43,23 @@ struct inet_diag_req_v2 { struct inet_diag_sockid id; }; +/* + * SOCK_RAW sockets require the underlied protocol to be + * additionally specified so we can use @pad member for + * this, but we can't rename it because userspace programs + * still may depend on this name. Instead lets use another + * structure definition as an alias for struct + * @inet_diag_req_v2. + */ +struct inet_diag_req_raw { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u8 idiag_ext; + __u8 sdiag_raw_protocol; + __u32 idiag_states; + struct inet_diag_sockid id; +}; + enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, -- cgit v1.2.3 From 11b6b5a4ced2f2c76073b97ee08ca0eab8358fde Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:41:58 +0300 Subject: cfg80211: Rename SAE_DATA to more generic AUTH_DATA This adds defines and nl80211 extensions to allow FILS Authentication to be implemented similarly to SAE. FILS does not need the special rules for the Authentication transaction number and Status code fields, but it does need to add non-IE fields. The previously used NL80211_ATTR_SAE_DATA can be reused for this to avoid having to duplicate that implementation. Rename that attribute to more generic NL80211_ATTR_AUTH_DATA (with backwards compatibility define for NL80211_SAE_DATA). Also document the special rules related to the Authentication transaction number and Status code fiels. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1362d24957b5..18bcf44899aa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1638,8 +1638,16 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * - * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts - * with the Authentication transaction sequence number field. + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. * * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -2195,7 +2203,7 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, - NL80211_ATTR_SAE_DATA, + NL80211_ATTR_AUTH_DATA, NL80211_ATTR_VHT_CAPABILITY, @@ -2347,6 +2355,7 @@ enum nl80211_attrs { #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA /* * Allow user space programs to use #ifdef on new attributes by defining them -- cgit v1.2.3 From 60b8084e844814631b57da3d35f272e0ff799ab2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:00 +0300 Subject: cfg80211: Add feature flag for Fast Initial Link Setup (FILS) as STA This defines a feature flag that drivers can use to indicate that they support FILS authentication/association (IEEE 802.11ai) when using user space SME (NL80211_CMD_AUTHENTICATE) in station mode. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 18bcf44899aa..7825fd4db19e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4647,6 +4647,8 @@ enum nl80211_feature_flags { * configuration (AP/mesh) with HT rates. * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate * configuration (AP/mesh) with VHT rates. + * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup + * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4661,6 +4663,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, NL80211_EXT_FEATURE_BEACON_RATE_HT, NL80211_EXT_FEATURE_BEACON_RATE_VHT, + NL80211_EXT_FEATURE_FILS_STA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 631810603a20874554b2f17adf42b72d0f15eda5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:02 +0300 Subject: cfg80211: Add Fast Initial Link Setup (FILS) auth algs This defines authentication algorithms for FILS (IEEE 802.11ai). Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7825fd4db19e..4dc21265cd12 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3669,6 +3669,9 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -3681,6 +3684,9 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, /* keep last */ __NL80211_AUTHTYPE_NUM, -- cgit v1.2.3 From 348bd456699801920a309c66e382380809fbdf41 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:03 +0300 Subject: cfg80211: Add KEK/nonces for FILS association frames The new nl80211 attributes can be used to provide KEK and nonces to allow the driver to encrypt and decrypt FILS (Re)Association Request/Response frames in station mode. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4dc21265cd12..a268a009528a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1944,6 +1944,11 @@ enum nl80211_commands { * attribute. * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2344,6 +2349,9 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From ce0ce13a1c89ff8b94b7f8fb32eb4c43e111c82e Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Mon, 10 Oct 2016 19:12:22 +0200 Subject: cfg80211: configure multicast to unicast for AP interfaces Add the ability to configure if an AP (and associated VLANs) will do multicast-to-unicast conversion for ARP, IPv4 and IPv6 frames (possibly within 802.1Q). If enabled, such frames are to be sent to each station separately, with the DA replaced by their own MAC address rather than the group address. Note that this may break certain expectations of the receiver, such as the ability to drop unicast IP packets received within multicast L2 frames, or the ability to not send ICMP destination unreachable messages for packets received in L2 multicast (which is required, but the receiver can't tell the difference if this new option is enabled.) This also doesn't implement the 802.11 DMS (directed multicast service). Signed-off-by: Michael Braun [fix disabling, add better documentation & commit message] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a268a009528a..e21d23dcb588 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -600,6 +600,20 @@ * * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial * mesh config parameters may be given. * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the @@ -1069,6 +1083,8 @@ enum nl80211_commands { NL80211_CMD_CHANGE_NAN_CONFIG, NL80211_CMD_NAN_MATCH, + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1950,6 +1966,9 @@ enum nl80211_commands { * Request/Response frame protection. This attribute contains the 16 octet * STA Nonce followed by 16 octets of AP Nonce. * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2352,6 +2371,8 @@ enum nl80211_attrs { NL80211_ATTR_FILS_KEK, NL80211_ATTR_FILS_NONCES, + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 088e8df82f91a24728d49d9532cab7ebdee5117f Mon Sep 17 00:00:00 2001 From: vamsi krishna Date: Thu, 27 Oct 2016 16:51:11 +0300 Subject: cfg80211: Add support to update connection parameters Add functionality to update the connection parameters when in connected state, so that driver/firmware uses the updated parameters for subsequent roaming. This is for drivers that support internal BSS selection and roaming. The new command does not change the current association state, i.e., it can be used to update IE contents for future (re)associations without causing an immediate disassociation or reassociation with the current BSS. This commit implements the required functionality for updating IEs for (Re)Association Request frame only. Other parameters can be added in future when required. Signed-off-by: vamsi krishna Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e21d23dcb588..259c9c77fdc1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -888,6 +888,12 @@ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1085,6 +1091,8 @@ enum nl80211_commands { NL80211_CMD_SET_MULTICAST_TO_UNICAST, + NL80211_CMD_UPDATE_CONNECT_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 4fe77d82ef80c77031c9c6f8554cd0dee2aa423a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 24 Oct 2016 20:32:57 +0800 Subject: skbedit: allow the user to specify bitmask for mark The user may want to use only some bits of the skb mark in his skbedit rules because the remaining part might be used by something else. Introduce the "mask" parameter to the skbedit actor in order to implement such functionality. When the mask is specified, only those bits selected by the latter are altered really changed by the actor, while the rest is left untouched. Signed-off-by: Antonio Quartulli Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_skbedit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index a4d00c608d8f..2884425738ce 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -28,6 +28,7 @@ #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 #define SKBEDIT_F_PTYPE 0x8 +#define SKBEDIT_F_MASK 0x10 struct tc_skbedit { tc_gen; @@ -42,6 +43,7 @@ enum { TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, TCA_SKBEDIT_PTYPE, + TCA_SKBEDIT_MASK, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) -- cgit v1.2.3 From a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:02 +0200 Subject: genetlink: no longer support using static family IDs Static family IDs have never really been used, the only use case was the workaround I introduced for those users that assumed their family ID was also their multicast group ID. Additionally, because static family IDs would never be reserved by the generic netlink code, using a relatively low ID would only work for built-in families that can be registered immediately after generic netlink is started, which is basically only the control family (apart from the workaround code, which I also had to add code for so it would reserve those IDs) Thus, anything other than GENL_ID_GENERATE is flawed and luckily not used except in the cases I mentioned. Move those workarounds into a few lines of code, and then get rid of GENL_ID_GENERATE entirely, making it more robust. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 5512c90af7e3..d9b2db4a29c6 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -26,7 +26,6 @@ struct genlmsghdr { /* * List of reserved static generic netlink identifiers: */ -#define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) -- cgit v1.2.3 From 2ae0f17df1cd52aafd1ab0415ea1f1dd56dc0e2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:04 +0200 Subject: genetlink: use idr to track families Since generic netlink family IDs are small integers, allocated densely, IDR is an ideal match for lookups. Replace the existing hand-written hash-table with IDR for allocation and lookup. This lets the families only be written to once, during register, since the list_head can be removed and removal of a family won't cause any writes. It also slightly reduces the code size (by about 1.3k on x86-64). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index d9b2db4a29c6..adc899381e0d 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -29,6 +29,8 @@ struct genlmsghdr { #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) /************************************************************************** * Controller -- cgit v1.2.3 From ebb676daa1a340ccef25eb769aefc09b79c01f8a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 27 Oct 2016 11:23:51 +0200 Subject: bpf: Print function name in addition to function id The verifier currently prints raw function ids when printing CALL instructions or when complaining: 5: (85) call 23 unknown func 23 print a meaningful function name instead: 5: (85) call bpf_redirect#23 unknown func bpf_redirect#23 Moves the function documentation to a single comment and renames all helpers names in the list to conform to the bpf_ prefix notation so they can be greped in the kernel source. Signed-off-by: Thomas Graf Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 574 ++++++++++++++++++++++++----------------------- 1 file changed, 289 insertions(+), 285 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 374ef582ae18..e2f38e0091b6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,297 +143,301 @@ union bpf_attr { }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - - /** - * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_current_task_under_cgroup, - - /** - * bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, - * to be used f.e. with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_tail, - - /** - * bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the - * skb is non-linear and not all of len are part of the - * linear section. Only needed for read/write with direct - * packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_pull_data, - - /** - * bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - */ - BPF_FUNC_csum_update, - - /** - * bpf_set_hash_invalid(skb) - * Invalidate current skb>hash. - * @skb: pointer to skb - */ - BPF_FUNC_set_hash_invalid, - - /** - * bpf_get_numa_node_id() - * Returns the id of the current NUMA node. - */ - BPF_FUNC_get_numa_node_id, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ -- cgit v1.2.3 From c62cce2caee558e18aa05c01c2fd3b40f07174f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 24 Oct 2016 18:29:13 -0700 Subject: net: add an ioctl to get a socket network namespace Each socket operates in a network namespace where it has been created, so if we want to dump and restore a socket, we have to know its network namespace. We have a socket_diag to get information about sockets, it doesn't report sockets which are not bound or connected. This patch introduces a new socket ioctl, which is called SIOCGSKNS and used to get a file descriptor for a socket network namespace. A task must have CAP_NET_ADMIN in a target network namespace to use this ioctl. Cc: "David S. Miller" Cc: Eric W. Biederman Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- include/uapi/linux/sockios.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 8e7890b26d9a..83cc54ce6081 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -84,6 +84,7 @@ #define SIOCWANDEV 0x894A /* get/set netdev parameters */ #define SIOCOUTQNSD 0x894B /* output queue size (not sent only) */ +#define SIOCGSKNS 0x894C /* get socket network namespace */ /* ARP cache control calls. */ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ -- cgit v1.2.3 From 20861f26e33d76a4f3587bcc866fa1dab3e01094 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 27 Oct 2016 09:05:22 +0800 Subject: driver: tun: Use new macro SOCK_IOC_TYPE instead of literal number 0x89 The current codes use _IOC_TYPE(cmd) == 0x89 to check if the cmd is one socket ioctl command like SIOCGIFHWADDR. But the literal number 0x89 may confuse readers. So create one macro SOCK_IOC_TYPE to enhance the readability. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/uapi/linux/sockios.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 83cc54ce6081..79d029d25310 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -24,6 +24,8 @@ #define SIOCINQ FIONREAD #define SIOCOUTQ TIOCOUTQ /* output queue size (not sent + not acked) */ +#define SOCK_IOC_TYPE 0x89 + /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ #define SIOCDELRT 0x890C /* delete routing table entry */ -- cgit v1.2.3 From f6d0cbcf09c506b9b022df8f9d7693a7cec3c732 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Oct 2016 16:56:40 +0200 Subject: netfilter: nf_tables: add fib expression Add FIB expression, supported for ipv4, ipv6 and inet family (the latter just dispatches to ipv4 or ipv6 one based on nfproto). Currently supports fetching output interface index/name and the rtm_type associated with an address. This can be used for adding path filtering. rtm_type is useful to e.g. enforce a strong-end host model where packets are only accepted if daddr is configured on the interface the packet arrived on. The fib expression is a native nftables alternative to the xtables addrtype and rp_filter matches. FIB result order for oif/oifname retrieval is as follows: - if packet is local (skb has rtable, RTF_LOCAL set, this will also catch looped-back multicast packets), set oif to the loopback interface. - if fib lookup returns an error, or result points to local, store zero result. This means '--local' option of -m rpfilter is not supported. It is possible to use 'fib type local' or add explicit saddr/daddr matching rules to create exceptions if this is really needed. - store result in the destination register. In case of multiple routes, search set for desired oif in case strict matching is requested. ipv4 and ipv6 behave fib expressions are supposed to behave the same. [ I have collapsed Arnd Bergmann's ("netfilter: nf_tables: fib warnings") http://patchwork.ozlabs.org/patch/688615/ to address fallout from this patch after rebasing nf-next, that was posted to address compilation warnings. --pablo ] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c6c4477c136b..a054ad2c8853 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1109,6 +1109,42 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ +}; + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * -- cgit v1.2.3 From 2fa841938c648fe4359691f41e8e1f37ff1a3aa2 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Fri, 28 Oct 2016 05:54:15 +0000 Subject: netfilter: nf_tables: introduce routing expression Introduces an nftables rt expression for routing related data with support for nexthop (i.e. the directly connected IP address that an outgoing packet is sent to), which can be used either for matching or accounting, eg. # nft add rule filter postrouting \ ip daddr 192.168.1.0/24 rt nexthop != 192.168.0.1 drop This will drop any traffic to 192.168.1.0/24 that is not routed via 192.168.0.1. # nft add rule filter postrouting \ flow table acct { rt nexthop timeout 600s counter } # nft add rule ip6 filter postrouting \ flow table acct { rt nexthop timeout 600s counter } These rules count outgoing traffic per nexthop. Note that the timeout releases an entry if no traffic is seen for this nexthop within 10 minutes. # nft add rule inet filter postrouting \ ether type ip \ flow table acct { rt nexthop timeout 600s counter } # nft add rule inet filter postrouting \ ether type ip6 \ flow table acct { rt nexthop timeout 600s counter } Same as above, but via the inet family, where the ether type must be specified explicitly. "rt classid" is also implemented identical to "meta rtclassid", since it is more logical to have this match in the routing expression going forward. Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a054ad2c8853..14e5f619167e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -758,6 +758,19 @@ enum nft_meta_keys { NFT_META_PRANDOM, }; +/** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, +}; + /** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * @@ -796,6 +809,20 @@ enum nft_meta_attributes { }; #define NFTA_META_MAX (__NFTA_META_MAX - 1) +/** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + /** * enum nft_ct_keys - nf_tables ct expression keys * -- cgit v1.2.3 From 06fd3a392bb36ff162d10cb7d5794185b94edb2f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:17 +0100 Subject: netfilter: deprecate NF_STOP NF_STOP is only used by br_netfilter these days, and it can be emulated with a combination of NF_STOLEN plus explicit call to the ->okfn() function as Florian suggests. To retain binary compatibility with userspace nf_queue application, we have to keep NF_STOP around, so libnetfilter_queue userspace userspace applications still work if they use NF_STOP for some exotic reason. Out of tree modules using NF_STOP would break, but we don't care about those. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index d93f949d1d9a..7550e9176a54 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -13,7 +13,7 @@ #define NF_STOLEN 2 #define NF_QUEUE 3 #define NF_REPEAT 4 -#define NF_STOP 5 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ #define NF_MAX_VERDICT NF_STOP /* we overload the higher bits for encoding auxiliary data such as the queue -- cgit v1.2.3 From 70ecc24841326396a827deb55c3fefac582a729d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:16 -0400 Subject: ipv4: add IP_RECVFRAGSIZE cmsg The IP stack records the largest fragment of a reassembled packet in IPCB(skb)->frag_max_size. When reading a datagram or raw packet that arrived fragmented, expose the value to allow applications to estimate receive path MTU. Tested: Sent data over a veth pair of which the source has a small mtu. Sent data using netcat, received using a dedicated process. Verified that the cmsg IP_RECVFRAGSIZE is returned only when data arrives fragmented, and in that cases matches the veth mtu. ip link add veth0 type veth peer name veth1 ip netns add from ip netns add to ip link set dev veth1 netns to ip netns exec to ip addr add dev veth1 192.168.10.1/24 ip netns exec to ip link set dev veth1 up ip link set dev veth0 netns from ip netns exec from ip addr add dev veth0 192.168.10.2/24 ip netns exec from ip link set dev veth0 up ip netns exec from ip link set dev veth0 mtu 1300 ip netns exec from ethtool -K veth0 ufo off dd if=/dev/zero bs=1 count=1400 2>/dev/null > payload ip netns exec to ./recv_cmsg_recvfragsize -4 -u -p 6000 & ip netns exec from nc -q 1 -u 192.168.10.1 6000 < payload using github.com/wdebruij/kerneltools/blob/master/tests/recvfragsize.c Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ -- cgit v1.2.3 From 0cc0aa614b4c24b21b2492c0a1753035ee8c6edb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:17 -0400 Subject: ipv6: add IPV6_RECVFRAGSIZE cmsg When reading a datagram or raw packet that arrived fragmented, expose the maximum fragment size if recorded to allow applications to estimate receive path MTU. At this point, the field is only recorded when ipv6 connection tracking is enabled. A follow-up patch will record this field also in the ipv6 input path. Tested using the test for IP_RECVFRAGSIZE plus ip netns exec to ip addr add dev veth1 fc07::1/64 ip netns exec from ip addr add dev veth0 fc07::2/64 ip netns exec to ./recv_cmsg_recvfragsize -6 -u -p 6000 & ip netns exec from nc -q 1 -u fc07::1 6000 < payload Both with and without enabling connection tracking ip6tables -A INPUT -m state --state NEW -p udp -j LOG Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: -- cgit v1.2.3 From 5976c5f45c40588b90dda173ded9010917f8f45e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 3 Nov 2016 13:24:21 +0100 Subject: net/sched: cls_flower: Support matching on SCTP ports Support matching on SCTP ports in the same way that matching on TCP and UDP ports is already supported. Example usage: tc qdisc add dev eth0 ingress tc filter add dev eth0 protocol ip parent ffff: \ flower indev eth0 ip_proto sctp dst_port 80 \ action drop Signed-off-by: Simon Horman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8fd715f806a2..eb94781757ee 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -447,6 +447,11 @@ enum { TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 622ec2c9d52405973c9f1ca5116eb1c393adfc7d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:42 +0900 Subject: net: core: add UID to flows, rules, and routes - Define a new FIB rule attributes, FRA_UID_RANGE, to describe a range of UIDs. - Define a RTA_UID attribute for per-UID route lookups and dumps. - Support passing these attributes to and from userspace via rtnetlink. The value INVALID_UID indicates no UID was specified. - Add a UID field to the flow structures. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/uapi/linux/fib_rules.h | 6 ++++++ include/uapi/linux/rtnetlink.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; -- cgit v1.2.3 From f4d997fd613001e612543339e0275c037f94ffe9 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:39 +0200 Subject: net/sched: cls_flower: Add UDP port to tunnel parameters The current IP tunneling classification supports only IP addresses and key. Enhance UDP based IP tunneling classification parameters by adding UDP src and dst port. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index eb94781757ee..86786d45ee66 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -452,6 +452,11 @@ enum { TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 75bfbca01e48d2d62e8321609ae32aaf6c6fab0e Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:41 +0200 Subject: net/sched: act_tunnel_key: Add UDP dst port option The current tunnel set action supports only IP addresses and key options. Add UDP dst port option. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_tunnel_key.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 890106ff16e6..84ea55e1076b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -33,6 +33,7 @@ enum { TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ TCA_TUNNEL_KEY_PAD, + TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ __TCA_TUNNEL_KEY_MAX, }; -- cgit v1.2.3 From 3f11ec045fecf2c0fb21f08f68ebc9237bd1d03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Mon, 7 Nov 2016 20:39:24 +0000 Subject: net: l2tp: change L2TP_ATTR_UDP_ZERO_CSUM6_{RX, TX} attribute types The attributes L2TP_ATTR_UDP_ZERO_CSUM6_RX and L2TP_ATTR_UDP_ZERO_CSUM6_TX are used as flags, but is defined as a u8 in a comment. This patch redocuments them as flags. Adding nla_policy entries would break API, so not doing that. CC: Tom Herbert Signed-off-by: Asbjoern Sloth Toennesen Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 4bd27d0270a2..5daa48e2571e 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -124,8 +124,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ - L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ - L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, __L2TP_ATTR_MAX, }; -- cgit v1.2.3 From 1ababeba4a21f3dba3da3523c670b207fb2feb62 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:39 +0100 Subject: ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header) Implement minimal support for processing of SR-enabled packets as described in https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02. This patch implements the following operations: - Intermediate segment endpoint: incrementation of active segment and rerouting. - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH and routing of inner packet. - Cleanup flag support for SR-inlined packets: removal of SRH if we are the penultimate segment endpoint. A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled packets. Default is deny. This patch does not provide support for HMAC-signed packets. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 2 ++ include/uapi/linux/seg6.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 include/uapi/linux/seg6.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..7ff1d654e333 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,7 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif -- cgit v1.2.3 From 915d7e5e5930b4f01d0971d93b9b25ed17d221aa Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:40 +0100 Subject: ipv6: sr: add code base for control plane support of SR-IPv6 This patch adds the necessary hooks and structures to provide support for SR-IPv6 control plane, essentially the Generic Netlink commands that will be used for userspace control over the Segment Routing kernel structures. The genetlink commands provide control over two different structures: tunnel source and HMAC data. The tunnel source is the source address that will be used by default when encapsulating packets into an outer IPv6 header + SRH. If the tunnel source is set to :: then an address of the outgoing interface will be selected as the source. The HMAC commands currently just return ENOTSUPP and will be implemented in a future patch. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/seg6_genl.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/uapi/linux/seg6_genl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif -- cgit v1.2.3 From 6c8702c60b88651072460f3f4026c7dfe2521d12 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:41 +0100 Subject: ipv6: sr: add support for SRH encapsulation and injection with lwtunnels This patch creates a new type of interfaceless lightweight tunnel (SEG6), enabling the encapsulation and injection of SRH within locally emitted packets and forwarded packets. >From a configuration viewpoint, a seg6 tunnel would be configured as follows: ip -6 ro ad fc00::1/128 encap seg6 mode encap segs fc42::1,fc42::2,fc42::3 dev eth0 Any packet whose destination address is fc00::1 would thus be encapsulated within an outer IPv6 header containing the SRH with three segments, and would actually be routed to the first segment of the list. If `mode inline' was specified instead of `mode encap', then the SRH would be directly inserted after the IPv6 header without outer encapsulation. The inline mode is only available if CONFIG_IPV6_SEG6_INLINE is enabled. This feature was made configurable because direct header insertion may break several mechanisms such as PMTUD or IPSec AH. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/seg6_iptunnel.h | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 include/uapi/linux/seg6_iptunnel.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..453cc6215bfd 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif -- cgit v1.2.3 From bf355b8d2c30a289232042cacc1cfaea4923936c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:42 +0100 Subject: ipv6: sr: add core files for SR HMAC support This patch adds the necessary functions to compute and check the HMAC signature of an SR-enabled packet. Two HMAC algorithms are supported: hmac(sha1) and hmac(sha256). In order to avoid dynamic memory allocation for each HMAC computation, a per-cpu ring buffer is allocated for this purpose. A new per-interface sysctl called seg6_require_hmac is added, allowing a user-defined policy for processing HMAC-signed SR-enabled packets. A value of -1 means that the HMAC field will always be ignored. A value of 0 means that if an HMAC field is present, its validity will be enforced (the packet is dropped is the signature is incorrect). Finally, a value of 1 means that any SR-enabled packet that does not contain an HMAC signature or whose signature is incorrect will be dropped. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + include/uapi/linux/seg6_hmac.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/uapi/linux/seg6_hmac.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 7ff1d654e333..53561be1ac21 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -180,6 +180,7 @@ enum { DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif -- cgit v1.2.3 From 91820da6ae85904d95ed53bf3a83f9ec44a6b80a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 10 Nov 2016 16:28:23 +0100 Subject: openvswitch: add Ethernet push and pop actions It's not allowed to push Ethernet header in front of another Ethernet header. It's not allowed to pop Ethernet header if there's a vlan tag. This preserves the invariant that L3 packet never has a vlan tag. Based on previous versions by Lorand Jakab and Simon Horman. Signed-off-by: Lorand Jakab Signed-off-by: Simon Horman Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 59ed3992c760..375d812fea36 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -705,6 +705,15 @@ enum ovs_nat_attr { #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) +/* + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; +}; + /** * enum ovs_action_attr - Action types. * @@ -738,6 +747,10 @@ enum ovs_nat_attr { * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related * entries in the flow key. + * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the + * packet. + * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the + * packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -765,6 +778,8 @@ enum ovs_action_attr { * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit v1.2.3 From 29ba732acbeece1e34c68483d1ec1f3720fa1bb3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 11 Nov 2016 10:55:09 -0800 Subject: bpf: Add BPF_MAP_TYPE_LRU_HASH Provide a LRU version of the existing BPF_MAP_TYPE_HASH. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2f38e0091b6..ed8c6799fb14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -85,6 +85,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, }; enum bpf_prog_type { @@ -106,6 +107,13 @@ enum bpf_prog_type { #define BPF_EXIST 2 /* update existing element */ #define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ -- cgit v1.2.3 From 8f8449384ec364ba2a654f11f94e754e4ff719e0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 11 Nov 2016 10:55:10 -0800 Subject: bpf: Add BPF_MAP_TYPE_LRU_PERCPU_HASH Provide a LRU version of the existing BPF_MAP_TYPE_PERCPU_HASH Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ed8c6799fb14..7d9b2832c280 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -86,6 +86,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, }; enum bpf_prog_type { @@ -108,7 +109,7 @@ enum bpf_prog_type { #define BPF_F_NO_PREALLOC (1U << 0) /* Instead of having one common LRU list in the - * BPF_MAP_TYPE_LRU_HASH map, use a percpu LRU list + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. -- cgit v1.2.3 From 0d27f4e437e448c4ff440a31567b9729d1634d66 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Thu, 17 Nov 2016 13:07:20 +0100 Subject: ethtool: (uapi) Add ETHTOOL_PHY_GTUNABLE and ETHTOOL_PHY_STUNABLE Defines a generic API to get/set phy tunables. The API is using the existing ethtool_tunable/tunable_type_id types which is already being used for mac level tunables. Signed-off-by: Raju Lakkaraju Reviewed-by: Andrew Lunn Signed-off-by: Allan W. Nielsen Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8e547231c1b7..42f696f139ec 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -248,6 +248,16 @@ struct ethtool_tunable { void *data[0]; }; +enum phy_tunable_id { + ETHTOOL_PHY_ID_UNSPEC, + + /* + * Add your fresh new phy tunable attribute above and remember to update + * phy_tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_PHY_TUNABLE_COUNT, +}; + /** * struct ethtool_regs - hardware register dump * @cmd: Command number = %ETHTOOL_GREGS @@ -548,6 +558,7 @@ struct ethtool_pauseparam { * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS + * @ETH_SS_PHY_TUNABLES: PHY tunable names */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -558,6 +569,7 @@ enum ethtool_stringset { ETH_SS_RSS_HASH_FUNCS, ETH_SS_TUNABLES, ETH_SS_PHY_STATS, + ETH_SS_PHY_TUNABLES, }; /** @@ -1313,7 +1325,8 @@ struct ethtool_per_queue_op { #define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ - +#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ +#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET -- cgit v1.2.3 From 607c7029146790201e90b58c4235ddff0304d6e0 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Thu, 17 Nov 2016 13:07:22 +0100 Subject: ethtool: (uapi) Add ETHTOOL_PHY_DOWNSHIFT to PHY tunables For operation in cabling environments that are incompatible with 1000BASE-T, PHY device may provide an automatic link speed downshift operation. When enabled, the device automatically changes its 1000BASE-T auto-negotiation to the next slower speed after a configured number of failed attempts at 1000BASE-T. This feature is useful in setting up in networks using older cable installations that include only pairs A and B, and not pairs C and D. Signed-off-by: Raju Lakkaraju Signed-off-by: Allan W. Nielsen Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 42f696f139ec..f0db7788f887 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -248,9 +248,12 @@ struct ethtool_tunable { void *data[0]; }; +#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff +#define DOWNSHIFT_DEV_DISABLE 0 + enum phy_tunable_id { ETHTOOL_PHY_ID_UNSPEC, - + ETHTOOL_PHY_DOWNSHIFT, /* * Add your fresh new phy tunable attribute above and remember to update * phy_tunable_strings[] in net/core/ethtool.c -- cgit v1.2.3 From 5e9235853d652a295d5f56cb8652950b6b5bf56b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 21 Nov 2016 13:03:24 +0100 Subject: bridge: mcast: add IGMPv3 query support This patch adds basic support for IGMPv3 queries, the default is IGMPv2 as before. A new multicast option - multicast_igmp_version, adds the ability to change it between 2 and 3 via netlink and sysfs. The option struct member is in a 4 byte hole in net_bridge. There also a few minor style adjustments in br_multicast_new_group and br_multicast_add_group. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b4fba662cd32..325d2601150d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -275,6 +275,7 @@ enum { IFLA_BR_PAD, IFLA_BR_VLAN_STATS_ENABLED, IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, __IFLA_BR_MAX, }; -- cgit v1.2.3 From aa2ae3e71c74cc00ec22f133dc900b3817415785 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 21 Nov 2016 13:03:25 +0100 Subject: bridge: mcast: add MLDv2 querier support This patch adds basic support for MLDv2 queries, the default is MLDv1 as before. A new multicast option - multicast_mld_version, adds the ability to change it between 1 and 2 via netlink and sysfs. The MLD option is disabled if CONFIG_IPV6 is disabled. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 325d2601150d..92b2d4928bf1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -276,6 +276,7 @@ enum { IFLA_BR_VLAN_STATS_ENABLED, IFLA_BR_MCAST_STATS_ENABLED, IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 59bfde01fab0c4550778cd53e8d266f1dfddf7b7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 22 Nov 2016 23:09:57 +0200 Subject: devlink: Add E-Switch inline mode control Some HWs need the VF driver to put part of the packet headers on the TX descriptor so the e-switch can do proper matching and steering. The supported modes: none, link, network, transport. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 915bfa74458c..9014c33d4e77 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -102,6 +102,13 @@ enum devlink_eswitch_mode { DEVLINK_ESWITCH_MODE_SWITCHDEV, }; +enum devlink_eswitch_inline_mode { + DEVLINK_ESWITCH_INLINE_MODE_NONE, + DEVLINK_ESWITCH_INLINE_MODE_LINK, + DEVLINK_ESWITCH_INLINE_MODE_NETWORK, + DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -133,6 +140,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ + DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From 0e33661de493db325435d565a4a722120ae4cbf3 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 23 Nov 2016 16:52:25 +0100 Subject: bpf: add new prog type for cgroup socket filtering This program type is similar to BPF_PROG_TYPE_SOCKET_FILTER, except that it does not allow BPF_LD_[ABS|IND] instructions and hooks up the bpf_skb_load_bytes() helper. Programs of this type will be attached to cgroups for network filtering and accounting. Signed-off-by: Daniel Mack Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7d9b2832c280..5ae679fac993 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -98,8 +98,17 @@ enum bpf_prog_type { BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -- cgit v1.2.3 From f4324551489e8781d838f941b7aee4208e52e8bf Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 23 Nov 2016 16:52:27 +0100 Subject: bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands Extend the bpf(2) syscall by two new commands, BPF_PROG_ATTACH and BPF_PROG_DETACH which allow attaching and detaching eBPF programs to a target. On the API level, the target could be anything that has an fd in userspace, hence the name of the field in union bpf_attr is called 'target_fd'. When called with BPF_ATTACH_TYPE_CGROUP_INET_{E,IN}GRESS, the target is expected to be a valid file descriptor of a cgroup v2 directory which has the bpf controller enabled. These are the only use-cases implemented by this patch at this point, but more can be added. If a program of the given type already exists in the given cgroup, the program is swapped automically, so userspace does not have to drop an existing program first before installing a new one, which would otherwise leave a gap in which no program is attached. For more information on the propagation logic to subcgroups, please refer to the bpf cgroup controller implementation. The API is guarded by CAP_NET_ADMIN. Signed-off-by: Daniel Mack Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5ae679fac993..1370a9d1456f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -73,6 +73,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -159,6 +161,12 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + }; } __attribute__((aligned(8))); /* BPF helper function descriptions: -- cgit v1.2.3 From efd90174167530c67a54273fd5d8369c87f9bd32 Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:17 -0800 Subject: tcp: export sender limits chronographs to TCP_INFO This patch exports all the sender chronograph measurements collected in the previous patches to TCP_INFO interface. Note that busy time exported includes all the other sending limits (rwnd-limited, sndbuf-limited). Internally the time unit is jiffy but externally the measurements are in microseconds for future extensions. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 73ac0db487f8..2863b661d6e1 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -214,6 +214,10 @@ struct tcp_info { __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ __u64 tcpi_delivery_rate; + + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 1c885808e45601b2b6f68b30ac1d999e10b6f606 Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:18 -0800 Subject: tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING This patch exports the sender chronograph stats via the socket SO_TIMESTAMPING channel. Currently we can instrument how long a particular application unit of data was queued in TCP by tracking SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_SCHED. Having these sender chronograph stats exported simultaneously along with these timestamps allow further breaking down the various sender limitation. For example, a video server can tell if a particular chunk of video on a connection takes a long time to deliver because TCP was experiencing small receive window. It is not possible to tell before this patch without packet traces. To prepare these stats, the user needs to set SOF_TIMESTAMPING_OPT_STATS and SOF_TIMESTAMPING_OPT_TSONLY flags while requesting other SOF_TIMESTAMPING TX timestamps. When the timestamps are available in the error queue, the stats are returned in a separate control message of type SCM_TIMESTAMPING_OPT_STATS, in a list of TLVs (struct nlattr) of types: TCP_NLA_BUSY_TIME, TCP_NLA_RWND_LIMITED, TCP_NLA_SNDBUF_LIMITED. Unit is microsecond. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 3 ++- include/uapi/linux/tcp.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 264e515de16f..464dcca5ed68 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -25,8 +25,9 @@ enum { SOF_TIMESTAMPING_TX_ACK = (1<<9), SOF_TIMESTAMPING_OPT_CMSG = (1<<10), SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), + SOF_TIMESTAMPING_OPT_STATS = (1<<12), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 2863b661d6e1..c53de2691cec 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -220,6 +220,14 @@ struct tcp_info { __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ }; +/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +enum { + TCP_NLA_PAD, + TCP_NLA_BUSY, /* Time (usec) busy sending data */ + TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ + TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ +}; + /* for TCP_MD5SIG socket option */ #define TCP_MD5SIG_MAXKEYLEN 80 -- cgit v1.2.3 From 85de8576a0b14aecc99136cfbf90e367fa2142cb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Nov 2016 23:16:54 +0100 Subject: bpf, xdp: allow to pass flags to dev_change_xdp_fd Add an IFLA_XDP_FLAGS attribute that can be passed for setting up XDP along with IFLA_XDP_FD, which eventually allows user space to implement typical add/replace/delete logic for programs. Right now, calling into dev_change_xdp_fd() will always replace previous programs. When passed XDP_FLAGS_UPDATE_IF_NOEXIST, we can handle this more graceful when requested by returning -EBUSY in case we try to attach a new program, but we find that another one is already attached. This will be used by upcoming front-end for iproute2 as well. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 92b2d4928bf1..6b13e591abc9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -876,10 +876,14 @@ enum { /* XDP section */ +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) + enum { IFLA_XDP_UNSPEC, IFLA_XDP_FD, IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, __IFLA_XDP_MAX, }; -- cgit v1.2.3 From 3a0af8fd61f90920f6fa04e4f1e9a6a73c1b4fd2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 30 Nov 2016 17:10:10 +0100 Subject: bpf: BPF for lightweight tunnel infrastructure Registers new BPF program types which correspond to the LWT hooks: - BPF_PROG_TYPE_LWT_IN => dst_input() - BPF_PROG_TYPE_LWT_OUT => dst_output() - BPF_PROG_TYPE_LWT_XMIT => lwtunnel_xmit() The separate program types are required to differentiate between the capabilities each LWT hook allows: * Programs attached to dst_input() or dst_output() are restricted and may only read the data of an skb. This prevent modification and possible invalidation of already validated packet headers on receive and the construction of illegal headers while the IP headers are still being assembled. * Programs attached to lwtunnel_xmit() are allowed to modify packet content as well as prepending an L2 header via a newly introduced helper bpf_skb_change_head(). This is safe as lwtunnel_xmit() is invoked after the IP header has been assembled completely. All BPF programs receive an skb with L3 headers attached and may return one of the following error codes: BPF_OK - Continue routing as per nexthop BPF_DROP - Drop skb and return EPERM BPF_REDIRECT - Redirect skb to device as per redirect() helper. (Only valid in lwtunnel_xmit() context) The return codes are binary compatible with their TC_ACT_ relatives to ease compatibility. Signed-off-by: Thomas Graf Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 32 +++++++++++++++++++++++++++++++- include/uapi/linux/lwtunnel.h | 23 +++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1370a9d1456f..22ac82792687 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -101,6 +101,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; enum bpf_attach_type { @@ -409,6 +412,16 @@ union bpf_attr { * * int bpf_get_numa_node_id() * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -453,7 +466,8 @@ union bpf_attr { FN(skb_pull_data), \ FN(csum_update), \ FN(set_hash_invalid), \ - FN(get_numa_node_id), + FN(get_numa_node_id), \ + FN(skb_change_head), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -537,6 +551,22 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 453cc6215bfd..92724cba1eba 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -10,6 +10,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, LWTUNNEL_ENCAP_SEG6, + LWTUNNEL_ENCAP_BPF, __LWTUNNEL_ENCAP_MAX, }; @@ -43,4 +44,26 @@ enum lwtunnel_ip6_t { #define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1) +enum { + LWT_BPF_PROG_UNSPEC, + LWT_BPF_PROG_FD, + LWT_BPF_PROG_NAME, + __LWT_BPF_PROG_MAX, +}; + +#define LWT_BPF_PROG_MAX (__LWT_BPF_PROG_MAX - 1) + +enum { + LWT_BPF_UNSPEC, + LWT_BPF_IN, + LWT_BPF_OUT, + LWT_BPF_XMIT, + LWT_BPF_XMIT_HEADROOM, + __LWT_BPF_MAX, +}; + +#define LWT_BPF_MAX (__LWT_BPF_MAX - 1) + +#define LWT_BPF_MAX_HEADROOM 256 + #endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From 61023658760032e97869b07d54be9681d2529e77 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Dec 2016 08:48:04 -0800 Subject: bpf: Add new cgroup attach type to enable sock modifications Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run any time a process in the cgroup opens an AF_INET or AF_INET6 socket. Currently only sk_bound_dev_if is exported to userspace for modification by a bpf program. This allows a cgroup to be configured such that AF_INET{6} sockets opened by processes are automatically bound to a specific device. In turn, this enables the running of programs that do not support SO_BINDTODEVICE in a specific VRF context / L3 domain. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 22ac82792687..bfe5e31a1288 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -101,6 +101,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, @@ -109,6 +110,7 @@ enum bpf_prog_type { enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, __MAX_BPF_ATTACH_TYPE }; @@ -567,6 +569,10 @@ enum bpf_ret_code { /* >127 are reserved for prog type specific return codes */ }; +struct bpf_sock { + __u32 bound_dev_if; +}; + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result -- cgit v1.2.3 From aa4c1037a30f4e88f444e83d42c2befbe0d5caf5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Dec 2016 08:48:06 -0800 Subject: bpf: Add support for reading socket family, type, protocol Add socket family, type and protocol to bpf_sock allowing bpf programs read-only access. Add __sk_flags_offset[0] to struct sock before the bitfield to programmtically determine the offset of the unsigned int containing protocol and type. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bfe5e31a1288..6123d9b8e828 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -571,6 +571,9 @@ enum bpf_ret_code { struct bpf_sock { __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; }; /* User return codes for XDP prog type. -- cgit v1.2.3 From adc176c5472214971d77c1a61c83db9b01e9cdc7 Mon Sep 17 00:00:00 2001 From: Erik Nordmark Date: Fri, 2 Dec 2016 14:00:08 -0800 Subject: ipv6 addrconf: Implemented enhanced DAD (RFC7527) Implemented RFC7527 Enhanced DAD. IPv6 duplicate address detection can fail if there is some temporary loopback of Ethernet frames. RFC7527 solves this by including a random nonce in the NS messages used for DAD, and if an NS is received with the same nonce it is assumed to be a looped back DAD probe and is ignored. RFC7527 is enabled by default. Can be disabled by setting both of conf/{all,interface}/enhanced_dad to zero. Signed-off-by: Erik Nordmark Signed-off-by: Bob Gilligan Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 53561be1ac21..eaf65dc82e22 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -181,6 +181,7 @@ enum { DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_SEG6_ENABLED, DEVCONF_SEG6_REQUIRE_HMAC, + DEVCONF_ENHANCED_DAD, DEVCONF_MAX }; -- cgit v1.2.3 From 3fefeb88d002850e591339fed291eb6a795d9f21 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:24 +0100 Subject: netfilter: nf_conntrack_tuple_common.h: fix #include To allow usage of enum ip_conntrack_dir in include/net/netns/conntrack.h, this patch encloses #include in a #ifndef __KERNEL__ directive, so that compiler errors caused by unwanted inclusion of include/linux/netfilter.h are avoided. In addition, #include line has been added to resolve correctly CTINFO2DIR macro. Signed-off-by: Davide Caratti Acked-by: Mikko Rapeli Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_tuple_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h index a9c3834abdd4..526b42496b78 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h @@ -2,7 +2,10 @@ #define _NF_CONNTRACK_TUPLE_COMMON_H #include +#ifndef __KERNEL__ #include +#endif +#include /* IP_CT_IS_REPLY */ enum ip_conntrack_dir { IP_CT_DIR_ORIGINAL, -- cgit v1.2.3 From 7bd509e311f408f7a5132fcdde2069af65fa05ae Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 4 Dec 2016 23:19:41 +0100 Subject: bpf: add prog_digest and expose it via fdinfo/netlink When loading a BPF program via bpf(2), calculate the digest over the program's instruction stream and store it in struct bpf_prog's digest member. This is done at a point in time before any instructions are rewritten by the verifier. Any unstable map file descriptor number part of the imm field will be zeroed for the hash. fdinfo example output for progs: # cat /proc/1590/fdinfo/5 pos: 0 flags: 02000002 mnt_id: 11 prog_type: 1 prog_jited: 1 prog_digest: b27e8b06da22707513aa97363dfb11c7c3675d28 memlock: 4096 When programs are pinned and retrieved by an ELF loader, the loader can check the program's digest through fdinfo and compare it against one that was generated over the ELF file's program section to see if the program needs to be reloaded. Furthermore, this can also be exposed through other means such as netlink in case of a tc cls/act dump (or xdp in future), but also through tracepoints or other facilities to identify the program. Other than that, the digest can also serve as a base name for the work in progress kallsyms support of programs. The digest doesn't depend/select the crypto layer, since we need to keep dependencies to a minimum. iproute2 will get support for this facility. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/tc_act/tc_bpf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 86786d45ee66..1adc0b654996 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,6 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, + TCA_BPF_DIGEST, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 063d9d465119..a6b88a6f7f71 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,6 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, + TCA_ACT_BPF_DIGEST, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) -- cgit v1.2.3 From 1814096980bbe546c4384b7b064126cbe7d40d30 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 24 Nov 2016 12:04:55 +0100 Subject: netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields This patch adds a new flag that signals the kernel to update layer 4 checksum if the packet field belongs to the layer 4 pseudoheader. This implicitly provides stateless NAT 1:1 that is useful under very specific usecases. Since rules mangling layer 3 fields that are part of the pseudoheader may potentially convey any layer 4 packet, we have to deal with the layer 4 checksum adjustment using protocol specific code. This patch adds support for TCP, UDP and ICMPv6, since they include the pseudoheader in the layer 4 checksum calculation. ICMP doesn't, so we can skip it. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 14e5f619167e..f030e59aa2ec 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -659,6 +659,10 @@ enum nft_payload_csum_types { NFT_PAYLOAD_CSUM_INET, }; +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -669,6 +673,7 @@ enum nft_payload_csum_types { * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -679,6 +684,7 @@ enum nft_payload_attributes { NFTA_PAYLOAD_SREG, NFTA_PAYLOAD_CSUM_TYPE, NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) -- cgit v1.2.3 From e50092404c1bc7aaeb0a0f4077fa6f07b073a20f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:04:32 +0100 Subject: netfilter: nf_tables: add stateful objects This patch augments nf_tables to support stateful objects. This new infrastructure allows you to create, dump and delete stateful objects, that are identified by a user-defined name. This patch adds the generic infrastructure, follow up patches add support for two stateful objects: counters and quotas. This patch provides a native infrastructure for nf_tables to replace nfacct, the extended accounting infrastructure for iptables. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f030e59aa2ec..18e30dbc8c3f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,6 +4,7 @@ #define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 /** @@ -85,6 +86,9 @@ enum nft_verdicts { * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -105,6 +109,9 @@ enum nf_tables_msg_types { NFT_MSG_NEWGEN, NFT_MSG_GETGEN, NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, NFT_MSG_MAX, }; @@ -1178,6 +1185,28 @@ enum nft_fib_flags { NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ }; +#define NFT_OBJECT_UNSPEC 0 + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * -- cgit v1.2.3 From b1ce0ced101ee134c5d0bbb378b2c3cadc617f20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:04:36 +0100 Subject: netfilter: nft_counter: add stateful object type Register a new percpu counter stateful object type into the stateful object infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 18e30dbc8c3f..e352ef65d753 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1186,6 +1186,7 @@ enum nft_fib_flags { }; #define NFT_OBJECT_UNSPEC 0 +#define NFT_OBJECT_COUNTER 1 /** * enum nft_object_attributes - nf_tables stateful object netlink attributes -- cgit v1.2.3 From 173705d9a2df1490478bf0d39f1b517bd489c8fa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:04:43 +0100 Subject: netfilter: nft_quota: add stateful object type Register a new quota stateful object type into the new stateful object infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e352ef65d753..ad0577ba5d2a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1187,6 +1187,7 @@ enum nft_fib_flags { #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 +#define NFT_OBJECT_QUOTA 2 /** * enum nft_object_attributes - nf_tables stateful object netlink attributes -- cgit v1.2.3 From c97d22e68bfedfacb9e752dee536c69916ae0933 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:38 +0100 Subject: netfilter: nf_tables: add stateful object reference expression This new expression allows us to refer to existing stateful objects from rules. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ad0577ba5d2a..1043ce4250c5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1137,6 +1137,20 @@ enum nft_fwd_attributes { }; #define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) +/** + * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes + * + * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) + * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + */ +enum nft_objref_attributes { + NFTA_OBJREF_UNSPEC, + NFTA_OBJREF_IMM_TYPE, + NFTA_OBJREF_IMM_NAME, + __NFTA_OBJREF_MAX +}; +#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) + /** * enum nft_gen_attributes - nf_tables ruleset generation attributes * -- cgit v1.2.3 From 795595f68d6c787028345804bb06f5a633af24a2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:52 +0100 Subject: netfilter: nft_quota: dump consumed quota Add a new attribute NFTA_QUOTA_CONSUMED that displays the amount of quota that has been already consumed. This allows us to restore the internal state of the quota object between reboots as well as to monitor how wasted it is. This patch changes the logic to account for the consumed bytes, instead of the bytes that remain to be consumed. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1043ce4250c5..3d47582caa80 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -988,12 +988,14 @@ enum nft_quota_flags { * * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64) */ enum nft_quota_attributes { NFTA_QUOTA_UNSPEC, NFTA_QUOTA_BYTES, NFTA_QUOTA_FLAGS, NFTA_QUOTA_PAD, + NFTA_QUOTA_CONSUMED, __NFTA_QUOTA_MAX }; #define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) -- cgit v1.2.3 From 43da04a593d8b2626f1cf4b56efe9402f6b53652 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:44 +0100 Subject: netfilter: nf_tables: atomic dump and reset for stateful objects This patch adds a new NFT_MSG_GETOBJ_RESET command perform an atomic dump-and-reset of the stateful object. This also comes with add support for atomic dump and reset for counter and quota objects. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3d47582caa80..399eac1eee91 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -89,6 +89,7 @@ enum nft_verdicts { * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -112,6 +113,7 @@ enum nf_tables_msg_types { NFT_MSG_NEWOBJ, NFT_MSG_GETOBJ, NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, NFT_MSG_MAX, }; -- cgit v1.2.3 From 1896531710abcd9a961a17d0c5c6a9f537d479b6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:56 +0100 Subject: netfilter: nft_quota: add depleted flag for objects Notify on depleted quota objects. The NFT_QUOTA_F_DEPLETED flag indicates we have reached overquota. Add pointer to table from nft_object, so we can use it when sending the depletion notification to userspace. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 399eac1eee91..4864caca1e8e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -983,6 +983,7 @@ enum nft_queue_attributes { enum nft_quota_flags { NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), }; /** -- cgit v1.2.3 From 8aeff920dcc9b3f8cf43042a76428582634d9208 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:06:00 +0100 Subject: netfilter: nf_tables: add stateful object reference to set elements This patch allows you to refer to stateful objects from set elements. This provides the infrastructure to create maps where the right hand side of the mapping is a stateful object. This allows us to build dictionaries of stateful objects, that you can use to perform fast lookups using any arbitrary key combination. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4864caca1e8e..a6b52dbff08c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -255,6 +255,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -263,6 +264,7 @@ enum nft_set_flags { NFT_SET_MAP = 0x8, NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, }; /** @@ -304,6 +306,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -321,6 +324,7 @@ enum nft_set_attributes { NFTA_SET_GC_INTERVAL, NFTA_SET_USERDATA, NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -344,6 +348,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -355,6 +360,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_USERDATA, NFTA_SET_ELEM_EXPR, NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -1207,6 +1213,8 @@ enum nft_fib_flags { #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 +#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** * enum nft_object_attributes - nf_tables stateful object netlink attributes -- cgit v1.2.3 From 63aea29060025fd2732680aa48a6b97687b93af8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:06:03 +0100 Subject: netfilter: nft_objref: support for stateful object maps This patch allows us to refer to stateful object dictionaries, the source register indicates the key data to be used to look up for the corresponding state object. We can refer to these maps through names or, alternatively, the map transaction id. This allows us to refer to both anonymous and named maps. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a6b52dbff08c..881d49e94569 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1153,11 +1153,17 @@ enum nft_fwd_attributes { * * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING) + * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32) */ enum nft_objref_attributes { NFTA_OBJREF_UNSPEC, NFTA_OBJREF_IMM_TYPE, NFTA_OBJREF_IMM_NAME, + NFTA_OBJREF_SET_SREG, + NFTA_OBJREF_SET_NAME, + NFTA_OBJREF_SET_ID, __NFTA_OBJREF_MAX }; #define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) -- cgit v1.2.3 From 2c16d60332643e90d4fa244f4a706c454b8c7569 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 6 Dec 2016 16:25:02 -0500 Subject: netfilter: xt_bpf: support ebpf Add support for attaching an eBPF object by file descriptor. The iptables binary can be called with a path to an elf object or a pinned bpf object. Also pass the mode and path to the kernel to be able to return it later for iptables dump and save. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_bpf.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 1fad2c27ac32..b97725af2ac0 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -2,9 +2,11 @@ #define _XT_BPF_H #include +#include #include #define XT_BPF_MAX_NUM_INSTR 64 +#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter)) struct bpf_prog; @@ -16,4 +18,23 @@ struct xt_bpf_info { struct bpf_prog *filter __attribute__((aligned(8))); }; +enum xt_bpf_modes { + XT_BPF_MODE_BYTECODE, + XT_BPF_MODE_FD_PINNED, + XT_BPF_MODE_FD_ELF, +}; + +struct xt_bpf_info_v1 { + __u16 mode; + __u16 bpf_program_num_elem; + __s32 fd; + union { + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + char path[XT_BPF_PATH_MAX]; + }; + + /* only used in the kernel */ + struct bpf_prog *filter __attribute__((aligned(8))); +}; + #endif /*_XT_BPF_H */ -- cgit v1.2.3 From faa3ffce78298b2b782297765cffd05f52fed9d4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 7 Dec 2016 14:03:10 +0200 Subject: net/sched: cls_flower: Add support for matching on flags Add UAPI to provide set of flags for matching, where the flags provided from user-space are mapped to flow-dissector flags. The 1st flag allows to match on whether the packet is an IP fragment and corresponds to the FLOW_DIS_IS_FRAGMENT flag. Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 1adc0b654996..0ad9f0bce043 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -458,11 +458,18 @@ enum { TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ __TCA_FLOWER_MAX, }; #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), +}; + /* Match-all classifier */ enum { -- cgit v1.2.3 From 7b684884fbfab33251115fa5054fb821c34b93be Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 7 Dec 2016 13:48:28 +0100 Subject: net/sched: cls_flower: Support matching on ICMP type and code Support matching on ICMP type and code. Example usage: tc qdisc add dev eth0 ingress tc filter add dev eth0 protocol ip parent ffff: flower \ indev eth0 ip_proto icmp type 8 code 0 action drop tc filter add dev eth0 protocol ipv6 parent ffff: flower \ indev eth0 ip_proto icmpv6 type 128 code 0 action drop Signed-off-by: Simon Horman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 0ad9f0bce043..cb4bcdc58543 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -461,6 +461,16 @@ enum { TCA_FLOWER_KEY_FLAGS, /* be32 */ TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 17bedab2723145d17b14084430743549e6943d03 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 7 Dec 2016 15:53:11 -0800 Subject: bpf: xdp: Allow head adjustment in XDP prog This patch allows XDP prog to extend/remove the packet data at the head (like adding or removing header). It is done by adding a new XDP helper bpf_xdp_adjust_head(). It also renames bpf_helper_changes_skb_data() to bpf_helper_changes_pkt_data() to better reflect that XDP prog does not work on skb. This patch adds one "xdp_adjust_head" bit to bpf_prog for the XDP-capable driver to check if the XDP prog requires bpf_xdp_adjust_head() support. The driver can then decide to error out during XDP_SETUP_PROG. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6123d9b8e828..0eb0e87dbe9f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -424,6 +424,12 @@ union bpf_attr { * @len: length of header to be pushed in front * @flags: Flags (unused for now) * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -469,7 +475,8 @@ union bpf_attr { FN(csum_update), \ FN(set_hash_invalid), \ FN(get_numa_node_id), \ - FN(skb_change_head), + FN(skb_change_head), \ + FN(xdp_adjust_head), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -576,6 +583,8 @@ struct bpf_sock { __u32 protocol; }; +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result -- cgit v1.2.3 From 2fa436b3a2a7009c11a3bc03fe0ff4c26e80fd87 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Date: Fri, 2 Dec 2016 23:59:08 +0200 Subject: nl80211: Use different attrs for BSSID and random MAC addr in scan req NL80211_ATTR_MAC was used to set both the specific BSSID to be scanned and the random MAC address to be used when privacy is enabled. When both the features are enabled, both the BSSID and the local MAC address were getting same value causing Probe Request frames to go with unintended DA. Hence, this has been fixed by using a different NL80211_ATTR_BSSID attribute to set the specific BSSID (which was the more recent addition in cfg80211) for a scan. Backwards compatibility with old userspace software is maintained to some extent by allowing NL80211_ATTR_MAC to be used to set the specific BSSID when scanning without enabling random MAC address use. Scanning with random source MAC address was introduced by commit ad2b26abc157 ("cfg80211: allow drivers to support random MAC addresses for scan") and the issue was introduced with the addition of the second user for the same attribute in commit 818965d39177 ("cfg80211: Allow a scan request for a specific BSSID"). Fixes: 818965d39177 ("cfg80211: Allow a scan request for a specific BSSID") Signed-off-by: Vamsi Krishna Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 259c9c77fdc1..6b76e3b0c18e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,7 +323,7 @@ * @NL80211_CMD_GET_SCAN: get scan results * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the - * probe requests at CCK rate or not. %NL80211_ATTR_MAC can be used to + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to * specify a BSSID to scan for; if not included, the wildcard BSSID will * be used. * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to @@ -1977,6 +1977,9 @@ enum nl80211_commands { * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast * packets should be send out as unicast to all stations (flag attribute). * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2381,6 +2384,8 @@ enum nl80211_attrs { NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + NL80211_ATTR_BSSID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 41c43fbee68f4f9a2a9675d83bca91c77862d7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sun, 11 Dec 2016 00:18:57 +0000 Subject: net: l2tp: export debug flags to UAPI Move the L2TP_MSG_* definitions to UAPI, as it is part of the netlink API. Signed-off-by: Asbjoern Sloth Toennesen Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 5daa48e2571e..85ddb74fcd1c 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -108,7 +108,7 @@ enum { L2TP_ATTR_VLAN_ID, /* u16 */ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ - L2TP_ATTR_DEBUG, /* u32 */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */ L2TP_ATTR_RECV_SEQ, /* u8 */ L2TP_ATTR_SEND_SEQ, /* u8 */ L2TP_ATTR_LNS_MODE, /* u8 */ @@ -175,6 +175,21 @@ enum l2tp_seqmode { L2TP_SEQ_ALL = 2, }; +/** + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions + * + * @L2TP_MSG_DEBUG: verbose debug (if compiled in) + * @L2TP_MSG_CONTROL: userspace - kernel interface + * @L2TP_MSG_SEQ: sequence numbers + * @L2TP_MSG_DATA: data packets + */ +enum l2tp_debug_flags { + L2TP_MSG_DEBUG = (1 << 0), + L2TP_MSG_CONTROL = (1 << 1), + L2TP_MSG_SEQ = (1 << 2), + L2TP_MSG_DATA = (1 << 3), +}; + /* * NETLINK_GENERIC related info */ -- cgit v1.2.3 From 47c3e7783be4e142b861d34b5c2e223330b05d8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sun, 11 Dec 2016 00:18:58 +0000 Subject: net: l2tp: deprecate PPPOL2TP_MSG_* in favour of L2TP_MSG_* PPPOL2TP_MSG_* and L2TP_MSG_* are duplicates, and are being used interchangeably in the kernel, so let's standardize on L2TP_MSG_* internally, and keep PPPOL2TP_MSG_* defined in UAPI for compatibility. Signed-off-by: Asbjoern Sloth Toennesen Signed-off-by: David S. Miller --- include/uapi/linux/if_pppol2tp.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 4bd1f55d6377..6418c4d10241 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -18,6 +18,7 @@ #include #include #include +#include /* Structure used to connect() the socket to a particular tunnel UDP * socket over IPv4. @@ -90,14 +91,12 @@ enum { PPPOL2TP_SO_REORDERTO = 5, }; -/* Debug message categories for the DEBUG socket option */ +/* Debug message categories for the DEBUG socket option (deprecated) */ enum { - PPPOL2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if - * compiled in) */ - PPPOL2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel - * interface */ - PPPOL2TP_MSG_SEQ = (1 << 2), /* sequence numbers */ - PPPOL2TP_MSG_DATA = (1 << 3), /* data packets */ + PPPOL2TP_MSG_DEBUG = L2TP_MSG_DEBUG, + PPPOL2TP_MSG_CONTROL = L2TP_MSG_CONTROL, + PPPOL2TP_MSG_SEQ = L2TP_MSG_SEQ, + PPPOL2TP_MSG_DATA = L2TP_MSG_DATA, }; -- cgit v1.2.3