From 5037a00992e5fcb3d8509964313565a3dab6697c Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Thu, 25 Jan 2018 17:13:37 +0200 Subject: nl80211: Introduce scan flags to emphasize requested scan behavior This commit defines new scan flags (LOW_SPAN, LOW_POWER, HIGH_LATENCY) to emphasize the requested scan behavior for the driver. These flags are optional and are mutually exclusive. The implementation of the respective functionality can be driver/hardware specific. These flags can be used to control the compromise between how long a scan takes, how much power it uses, and high accurate/complete the scan is in finding the BSSs. Signed-off-by: Sunil Dutt Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c587a61c32bf..6f60503fa617 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4945,6 +4945,9 @@ enum nl80211_feature_flags { * probe request tx deferral and suppression * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL * value in %NL80211_ATTR_USE_MFP. + * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan. + * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan. + * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4972,6 +4975,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, NL80211_EXT_FEATURE_MFP_OPTIONAL, + NL80211_EXT_FEATURE_LOW_SPAN_SCAN, + NL80211_EXT_FEATURE_LOW_POWER_SCAN, + NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -5032,6 +5038,10 @@ enum nl80211_timeout_reason { * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN * requests. * + * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and + * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only + * one of them can be used in the request. + * * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured @@ -5059,7 +5069,20 @@ enum nl80211_timeout_reason { * and suppression (if it has received a broadcast Probe Response frame, * Beacon frame or FILS Discovery frame from an AP that the STA considers * a suitable candidate for (re-)association - suitable in terms of - * SSID and/or RSSI + * SSID and/or RSSI. + * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to + * accomplish the scan. Thus, this flag intends the driver to perform the + * scan request with lesser span/duration. It is specific to the driver + * implementations on how this is accomplished. Scan accuracy may get + * impacted with this flag. + * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume + * optimal possible power. Drivers can resort to their specific means to + * optimize the power. Scan accuracy may get impacted with this flag. + * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan + * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum + * possible scan results. This flag hints the driver to use the best + * possible scan configuration to improve the accuracy in scanning. + * Latency and power use may get impacted with this flag. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, @@ -5070,6 +5093,9 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5, NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6, NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7, + NL80211_SCAN_FLAG_LOW_SPAN = 1<<8, + NL80211_SCAN_FLAG_LOW_POWER = 1<<9, + NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10, }; /** -- cgit v1.2.3 From 40cbfa90218bc570a7959b436b9d48a18c361041 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Thu, 25 Jan 2018 17:13:38 +0200 Subject: cfg80211/nl80211: Optional authentication offload to userspace This interface allows the host driver to offload the authentication to user space. This is exclusively defined for host drivers that do not define separate commands for authentication and association, but rely on userspace SME (e.g., in wpa_supplicant for the ~WPA_DRIVER_FLAGS_SME case) for the authentication to happen. This can be used to implement SAE without full implementation in the kernel/firmware while still being able to use NL80211_CMD_CONNECT with driver-based BSS selection. Host driver sends NL80211_CMD_EXTERNAL_AUTH event to start/abort authentication to the port on which connect is triggered and status of authentication is further indicated by user space to host driver through the same command response interface. User space entities advertise this capability through the NL80211_ATTR_EXTERNAL_AUTH_SUPP flag in the NL80211_CMD_CONNECT request. Host drivers shall look at this capability to offload the authentication. Signed-off-by: Srinivas Dasari Signed-off-by: Jouni Malinen [add socket connection ownership check] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6f60503fa617..c2342456cf16 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -992,6 +992,27 @@ * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * + * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host + * drivers that do not define separate commands for authentication and + * association, but rely on user space for the authentication to happen. + * This interface acts both as the event request (driver to user space) + * to trigger the authentication and command response (userspace to + * driver) to indicate the authentication status. + * + * User space uses the %NL80211_CMD_CONNECT command to the host driver to + * trigger a connection. The host driver selects a BSS and further uses + * this interface to offload only the authentication part to the user + * space. Authentication frames are passed between the driver and user + * space through the %NL80211_CMD_FRAME interface. Host driver proceeds + * further with the association after getting successful authentication + * status. User space indicates the authentication status through + * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH + * command interface. + * + * Host driver reports this status on an authentication failure to the + * user space through the connect result as the user space would have + * initiated the connection through the connect request. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1198,6 +1219,8 @@ enum nl80211_commands { NL80211_CMD_RELOAD_REGDB, + NL80211_CMD_EXTERNAL_AUTH, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2153,6 +2176,16 @@ enum nl80211_commands { * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. * @NL80211_ATTR_PORT_AUTHORIZED: (reserved) * + * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external + * authentication operation (u32 attribute with an + * &enum nl80211_external_auth_action value). This is used with the + * &NL80211_CMD_EXTERNAL_AUTH request event. + * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user + * space supports external authentication. This attribute shall be used + * only with %NL80211_CMD_CONNECT request. The driver may offload + * authentication processing to user space if this capability is indicated + * in NL80211_CMD_CONNECT requests from the user space. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2579,6 +2612,9 @@ enum nl80211_attrs { NL80211_ATTR_PMKR0_NAME, NL80211_ATTR_PORT_AUTHORIZED, + NL80211_ATTR_EXTERNAL_AUTH_ACTION, + NL80211_ATTR_EXTERNAL_AUTH_SUPPORT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5495,4 +5531,15 @@ enum nl80211_nan_match_attributes { NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 }; +/** + * nl80211_external_auth_action - Action to perform with external + * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION. + * @NL80211_EXTERNAL_AUTH_START: Start the authentication. + * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication. + */ +enum nl80211_external_auth_action { + NL80211_EXTERNAL_AUTH_START, + NL80211_EXTERNAL_AUTH_ABORT, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 466b9936bf93b7ec3bce1dcd493262ff0a8a4f44 Mon Sep 17 00:00:00 2001 From: "tamizhr@codeaurora.org" Date: Wed, 31 Jan 2018 16:24:49 +0530 Subject: cfg80211: Add support to notify station's opmode change to userspace ht/vht action frames will be sent to AP from station to notify change of its ht/vht opmode(max bandwidth, smps mode or nss) modified values. Currently these valuse used by driver/firmware for rate control algorithm. This patch introduces NL80211_CMD_STA_OPMODE_CHANGED command to notify those modified/current supported values(max bandwidth, smps mode, max nss) to userspace application. This will be useful for the application like steering, which closely monitoring station's capability changes. Since the application has taken these values during station association. Signed-off-by: Tamizh chelvam Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c2342456cf16..ca3d5a613fc0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1013,6 +1013,11 @@ * user space through the connect result as the user space would have * initiated the connection through the connect request. * + * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's + * ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE, + * &NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its + * address(specified in &NL80211_ATTR_MAC). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1221,6 +1226,8 @@ enum nl80211_commands { NL80211_CMD_EXTERNAL_AUTH, + NL80211_CMD_STA_OPMODE_CHANGED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2186,6 +2193,9 @@ enum nl80211_commands { * authentication processing to user space if this capability is indicated * in NL80211_CMD_CONNECT requests from the user space. * + * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this + * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2615,6 +2625,8 @@ enum nl80211_attrs { NL80211_ATTR_EXTERNAL_AUTH_ACTION, NL80211_ATTR_EXTERNAL_AUTH_SUPPORT, + NL80211_ATTR_NSS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 01883eda72bd3f0a6c81447e4f223de14033fd9d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 15 Feb 2018 10:49:35 -0800 Subject: rds: support for zcopy completion notification RDS removes a datagram (rds_message) from the retransmit queue when an ACK is received. The ACK indicates that the receiver has queued the RDS datagram, so that the sender can safely forget the datagram. When all references to the rds_message are quiesced, rds_message_purge is called to release resources used by the rds_message If the datagram to be removed had pinned pages set up, add an entry to the rs->rs_znotify_queue so that the notifcation will be sent up via rds_rm_zerocopy_callback() when the rds_message is eventually freed by rds_message_purge. rds_rm_zerocopy_callback() attempts to batch the number of cookies sent with each notification to a max of SO_EE_ORIGIN_MAX_ZCOOKIES. This is achieved by checking the tail skb in the sk_error_queue: if this has room for one more cookie, the cookie from the current notification is added; else a new skb is added to the sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will trigger a ->sk_error_report to notify the application. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index dc64cfaf13da..28812eda4209 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -20,11 +20,13 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 #define SO_EE_ORIGIN_ZEROCOPY 5 +#define SO_EE_ORIGIN_ZCOOKIE 6 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) #define SO_EE_CODE_ZEROCOPY_COPIED 1 +#define SO_EE_ORIGIN_MAX_ZCOOKIES 8 /** * struct scm_timestamping - timestamps exposed through cmsg -- cgit v1.2.3 From 0cebaccef3acbdfbc2d85880a2efb765d2f4e2e3 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 15 Feb 2018 10:49:36 -0800 Subject: rds: zerocopy Tx support. If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and, if the SO_ZEROCOPY socket option has been set on the PF_RDS socket, application pages sent down with rds_sendmsg() are pinned. The pinning uses the accounting infrastructure added by Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages") The payload bytes in the message may not be modified for the duration that the message has been pinned. A multi-threaded application using this infrastructure may thus need to be notified about send-completion so that it can free/reuse the buffers passed to rds_sendmsg(). Notification of send-completion will identify each message-buffer by a cookie that the application must specify as ancillary data to rds_sendmsg(). The ancillary data in this case has cmsg_level == SOL_RDS and cmsg_type == RDS_CMSG_ZCOPY_COOKIE. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index e71d4491f225..12e3bca32cad 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -103,6 +103,7 @@ #define RDS_CMSG_MASKED_ATOMIC_FADD 8 #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_CMSG_RXPATH_LATENCY 11 +#define RDS_CMSG_ZCOPY_COOKIE 12 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 -- cgit v1.2.3 From 1ec010e705934c8acbe7dbf31afc81e60e3d828b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Feb 2018 11:03:07 +0100 Subject: tun: export flags, uid, gid, queue information over netlink Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6d9447700e18..11d0c0ea2bfa 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -941,4 +941,22 @@ enum { IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ }; +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From c4b50cd31d25c3d17886ffc47ca4a9a12c6dc9bf Mon Sep 17 00:00:00 2001 From: Venkateswara Naralasetty Date: Tue, 13 Feb 2018 11:03:06 +0530 Subject: cfg80211: send ack_signal to user in probe client response This patch provides support to get ack signal in probe client response and in station info from user. Signed-off-by: Venkateswara Naralasetty [squash in compilation fixes] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ca3d5a613fc0..c13c84304be3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2626,6 +2626,7 @@ enum nl80211_attrs { NL80211_ATTR_EXTERNAL_AUTH_SUPPORT, NL80211_ATTR_NSS, + NL80211_ATTR_ACK_SIGNAL, /* add attributes here, update the policy in nl80211.c */ @@ -2947,6 +2948,7 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames * received from the station (u64, usec) * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment + * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -2985,6 +2987,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_TID_STATS, NL80211_STA_INFO_RX_DURATION, NL80211_STA_INFO_PAD, + NL80211_STA_INFO_ACK_SIGNAL, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From ccc007e4a746bb592d3e72106f00241f81d51410 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 15 Feb 2018 19:42:43 +0200 Subject: net: sched: add em_ipt ematch for calling xtables matches The commit a new tc ematch for using netfilter xtable matches. This allows early classification as well as mirroning/redirecting traffic based on logic implemented in netfilter extensions. Current supported use case is classification based on the incoming IPSec state used during decpsulation using the 'policy' iptables extension (xt_policy). The module dynamically fetches the netfilter match module and calls it using a fake xt_action_param structure based on validated userspace provided parameters. As the xt_policy match does not access skb->data, no skb modifications are needed on match. Signed-off-by: Eyal Birger Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 3 ++- include/uapi/linux/tc_ematch/tc_em_ipt.h | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/tc_ematch/tc_em_ipt.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 46c506615f4a..7cafb26df555 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -555,7 +555,8 @@ enum { #define TCF_EM_VLAN 6 #define TCF_EM_CANID 7 #define TCF_EM_IPSET 8 -#define TCF_EM_MAX 8 +#define TCF_EM_IPT 9 +#define TCF_EM_MAX 9 enum { TCF_EM_PROG_TC diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h new file mode 100644 index 000000000000..49a65530992c --- /dev/null +++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_TC_EM_IPT_H +#define __LINUX_TC_EM_IPT_H + +#include +#include + +enum { + TCA_EM_IPT_UNSPEC, + TCA_EM_IPT_HOOK, + TCA_EM_IPT_MATCH_NAME, + TCA_EM_IPT_MATCH_REVISION, + TCA_EM_IPT_NFPROTO, + TCA_EM_IPT_MATCH_DATA, + __TCA_EM_IPT_MAX +}; + +#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1) + +#endif -- cgit v1.2.3 From cac56209a66ea3b0be67aa2966b2c628b944da1e Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 20 Feb 2018 08:55:58 -0500 Subject: net: Allow a rule to track originating protocol Allow a rule that is being added/deleted/modified or dumped to contain the originating protocol's id. The protocol is handled just like a routes originating protocol is. This is especially useful because there is starting to be a plethora of different user space programs adding rules. Allow the vrf device to specify that the kernel is the originator of the rule created for this device. Signed-off-by: Donald Sharp Signed-off-by: David S. Miller --- include/uapi/linux/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b642bf9b5a0..925539172d5b 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8 tos; __u8 table; + __u8 proto; __u8 res1; /* reserved */ - __u8 res2; /* reserved */ __u8 action; __u32 flags; -- cgit v1.2.3 From 4fe0de5b143762d327bfaf1d7be7c5b58041a18c Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 31 Jan 2018 19:04:15 -0600 Subject: uapi: Add 802.11 Preauthentication to if_ether This adds 0x88c7 protocol type to if_ether. Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index f8cb5760ea4f..54585906e50a 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -89,6 +89,7 @@ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ +#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ -- cgit v1.2.3 From 1b71af6053af1bd2f849e9fda4f71c1e3f145dcf Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Fri, 23 Feb 2018 14:01:52 -0500 Subject: net: fib_rules: Add new attribute to set protocol For ages iproute2 has used `struct rtmsg` as the ancillary header for FIB rules and in the process set the protocol value to RTPROT_BOOT. Until ca56209a66 ("net: Allow a rule to track originating protocol") the kernel rules code ignored the protocol value sent from userspace and always returned 0 in notifications. To avoid incompatibility with existing iproute2, send the protocol as a new attribute. Fixes: cac56209a66 ("net: Allow a rule to track originating protocol") Signed-off-by: Donald Sharp Signed-off-by: David S. Miller --- include/uapi/linux/fib_rules.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 925539172d5b..77d90ae38114 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -23,8 +23,8 @@ struct fib_rule_hdr { __u8 tos; __u8 table; - __u8 proto; - __u8 res1; /* reserved */ + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ __u8 action; __u32 flags; @@ -58,6 +58,7 @@ enum { FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ + FRA_PROTOCOL, /* Originator of the rule */ __FRA_MAX }; -- cgit v1.2.3 From 6b1aea8cf2c8618146edaf6b35775ab55f7cafe5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 1 Jan 2018 00:00:00 +0100 Subject: batman-adv: Update copyright years for 2018 Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 2 +- include/uapi/linux/batman_adv.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 5cb360be2a11..daefd7283896 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ -/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index ae00c99cbed0..56ae28934070 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: MIT */ -/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: * * Matthias Schiffer * -- cgit v1.2.3 From a163dc22d515d17844435c8217ff66193d35b3fa Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 24 Jan 2018 12:21:37 +0100 Subject: batman-adv: always assume 2-byte packet alignment NIC drivers generally try to ensure that the "network header" is aligned to a 4-byte boundary. This is not always possible: When Ethernet frames are encapsulated in other packets with 4-byte aligned headers, the inner Ethernet header will have 4-byte alignment, and in consequence, the inner network header is aligned to 2, but not to 4 bytes. Most parts of batman-adv only care about 2-byte alignment; in particular, no unaligned accesses occur in performance-critical paths that handle actual payload data. This is not true for OGM handling: the seqno and crc fields are accessed as 32-bit values. To avoid these unaligned accesses, this patch reduces the expected packet alignment to 2 bytes for all of batadv's packet types. As no unaligned accesses existed on the performance-critical paths anyways, this chance does have any (positive or negative) effect on performance, but it still makes sense to avoid these accesses to prevent log noise when examining other unaligned accesses in the kernel while batman-adv is active. Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index daefd7283896..894d8d2f713d 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -196,8 +196,6 @@ struct batadv_bla_claim_dst { __be16 group; /* group id */ }; -#pragma pack() - /** * struct batadv_ogm_packet - ogm (routing protocol) packet * @packet_type: batman-adv packet type, part of the general header @@ -222,9 +220,6 @@ struct batadv_ogm_packet { __u8 reserved; __u8 tq; __be16 tvlv_len; - /* __packed is not needed as the struct size is divisible by 4, - * and the largest data type in this struct has a size of 4. - */ }; #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) @@ -249,9 +244,6 @@ struct batadv_ogm2_packet { __u8 orig[ETH_ALEN]; __be16 tvlv_len; __be32 throughput; - /* __packed is not needed as the struct size is divisible by 4, - * and the largest data type in this struct has a size of 4. - */ }; #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet) @@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr { * misalignment of the payload after the ethernet header. It may also lead to * leakage of information when the padding it not initialized before sending. */ -#pragma pack(2) /** * struct batadv_unicast_packet - unicast packet for network payload @@ -533,8 +524,6 @@ struct batadv_coded_packet { __be16 coded_len; }; -#pragma pack() - /** * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload * @packet_type: batman-adv packet type, part of the general header @@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data { __u8 reserved[3]; }; +#pragma pack() + #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */ -- cgit v1.2.3 From 401910db4cd425899832a093539222b6174f92a2 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Tue, 27 Feb 2018 09:52:43 -0800 Subject: rds: deliver zerocopy completion notification with data This commit is an optimization over commit 01883eda72bd ("rds: support for zcopy completion notification") for PF_RDS sockets. RDS applications are predominantly request-response transactions, so it is more efficient to reduce the number of system calls and have zerocopy completion notification delivered as ancillary data on the POLLIN channel. Cookies are passed up as ancillary data (at level SOL_RDS) in a struct rds_zcopy_cookies when the returned value of recvmsg() is greater than, or equal to, 0. A max of RDS_MAX_ZCOOKIES may be passed with each message. This commit removes support for zerocopy completion notification on MSG_ERRQUEUE for PF_RDS sockets. Signed-off-by: Sowmini Varadhan Acked-by: Willem de Bruijn Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 2 -- include/uapi/linux/rds.h | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 28812eda4209..dc64cfaf13da 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -20,13 +20,11 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 #define SO_EE_ORIGIN_ZEROCOPY 5 -#define SO_EE_ORIGIN_ZCOOKIE 6 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) #define SO_EE_CODE_ZEROCOPY_COPIED 1 -#define SO_EE_ORIGIN_MAX_ZCOOKIES 8 /** * struct scm_timestamping - timestamps exposed through cmsg diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 12e3bca32cad..a66b213de3d7 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -104,6 +104,7 @@ #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_CMSG_RXPATH_LATENCY 11 #define RDS_CMSG_ZCOPY_COOKIE 12 +#define RDS_CMSG_ZCOPY_COMPLETION 13 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -317,6 +318,12 @@ struct rds_rdma_notify { #define RDS_RDMA_DROPPED 3 #define RDS_RDMA_OTHER_ERROR 4 +#define RDS_MAX_ZCOOKIES 8 +struct rds_zcopy_cookies { + __u32 num; + __u32 cookies[RDS_MAX_ZCOOKIES]; +}; + /* * Common set of flags for all RDMA related structs */ -- cgit v1.2.3 From bfff4862653bb96001ab57c1edd6d03f48e5f035 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 28 Feb 2018 22:40:16 -0500 Subject: net: fib_rules: support for match on ip_proto, sport and dport uapi for ip_proto, sport and dport range match in fib rules. Signed-off-by: Roopa Prabhu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/fib_rules.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 77d90ae38114..232df14e1287 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -35,6 +35,11 @@ struct fib_rule_uid_range { __u32 end; }; +struct fib_rule_port_range { + __u16 start; + __u16 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -59,6 +64,9 @@ enum { FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ FRA_PROTOCOL, /* Originator of the rule */ + FRA_IP_PROTO, /* ip proto */ + FRA_SPORT_RANGE, /* sport */ + FRA_DPORT_RANGE, /* dport */ __FRA_MAX }; -- cgit v1.2.3 From 77a5196a804e34ce5e215ef84d5e1de332e9c529 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 1 Mar 2018 13:49:57 -0800 Subject: gre: add sequence number for collect md mode. Currently GRE sequence number can only be used in native tunnel mode. This patch adds sequence number support for gre collect metadata mode. RFC2890 defines GRE sequence number to be specific to the traffic flow identified by the key. However, this patch does not implement per-key seqno. The sequence number is shared in the same tunnel device. That is, different tunnel keys using the same collect_md tunnel share single sequence number. Signed-off-by: William Tu Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index db6bdc375126..2a66769e5875 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -800,6 +800,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_SEQ_NUMBER (1ULL << 3) /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. -- cgit v1.2.3 From 87ecc95d81d951b0984f2eb9c5c118cb68d0dce8 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Sun, 4 Mar 2018 10:38:35 -0800 Subject: tcp: add send queue size stat in SCM_TIMESTAMPING_OPT_STATS This patch adds TCP_NLA_SENDQ_SIZE stat into SCM_TIMESTAMPING_OPT_STATS. It reports no. of bytes present in send queue, when timestamp is generated. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index b4a4f64635fa..93bad2128ef6 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -241,6 +241,7 @@ enum { TCP_NLA_MIN_RTT, /* minimum RTT */ TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ }; -- cgit v1.2.3 From be631892948060f44b1ceee3132be1266932071e Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Sun, 4 Mar 2018 10:38:36 -0800 Subject: tcp: add ca_state stat in SCM_TIMESTAMPING_OPT_STATS This patch adds TCP_NLA_CA_STATE stat into SCM_TIMESTAMPING_OPT_STATS. It reports ca_state of socket, when timestamp is generated. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 93bad2128ef6..4c0ae0faf7ca 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -242,6 +242,7 @@ enum { TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ + TCP_NLA_CA_STATE, /* ca_state of socket */ }; -- cgit v1.2.3 From 955dc68cb9b23b42999cafe6df3684309bc686c6 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Mon, 5 Mar 2018 11:39:05 +1100 Subject: net/ncsi: Add generic netlink family Add a generic netlink family for NCSI. This supports three commands; NCSI_CMD_PKG_INFO which returns information on packages and their associated channels, NCSI_CMD_SET_INTERFACE which allows a specific package or package/channel combination to be set as the preferred choice, and NCSI_CMD_CLEAR_INTERFACE which clears any preferred setting. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/uapi/linux/ncsi.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 include/uapi/linux/ncsi.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h new file mode 100644 index 000000000000..4c292ecbb748 --- /dev/null +++ b/include/uapi/linux/ncsi.h @@ -0,0 +1,115 @@ +/* + * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __UAPI_NCSI_NETLINK_H__ +#define __UAPI_NCSI_NETLINK_H__ + +/** + * enum ncsi_nl_commands - supported NCSI commands + * + * @NCSI_CMD_UNSPEC: unspecified command to catch errors + * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires + * NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the + * specific package and its channels - otherwise a dump request returns + * all packages and their associated channels. + * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination. + * Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and + * optionally the preferred NCSI_ATTR_CHANNEL_ID. + * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination. + * Requires NCSI_ATTR_IFINDEX. + * @NCSI_CMD_MAX: highest command number + */ +enum ncsi_nl_commands { + NCSI_CMD_UNSPEC, + NCSI_CMD_PKG_INFO, + NCSI_CMD_SET_INTERFACE, + NCSI_CMD_CLEAR_INTERFACE, + + __NCSI_CMD_AFTER_LAST, + NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1 +}; + +/** + * enum ncsi_nl_attrs - General NCSI netlink attributes + * + * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors + * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI + * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes + * @NCSI_ATTR_PACKAGE_ID: package ID + * @NCSI_ATTR_CHANNEL_ID: channel ID + * @NCSI_ATTR_MAX: highest attribute number + */ +enum ncsi_nl_attrs { + NCSI_ATTR_UNSPEC, + NCSI_ATTR_IFINDEX, + NCSI_ATTR_PACKAGE_LIST, + NCSI_ATTR_PACKAGE_ID, + NCSI_ATTR_CHANNEL_ID, + + __NCSI_ATTR_AFTER_LAST, + NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1 +}; + +/** + * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes + * + * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors + * @NCSI_PKG_ATTR: nested array of package attributes + * @NCSI_PKG_ATTR_ID: package ID + * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred + * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes + * @NCSI_PKG_ATTR_MAX: highest attribute number + */ +enum ncsi_nl_pkg_attrs { + NCSI_PKG_ATTR_UNSPEC, + NCSI_PKG_ATTR, + NCSI_PKG_ATTR_ID, + NCSI_PKG_ATTR_FORCED, + NCSI_PKG_ATTR_CHANNEL_LIST, + + __NCSI_PKG_ATTR_AFTER_LAST, + NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1 +}; + +/** + * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes + * + * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors + * @NCSI_CHANNEL_ATTR: nested array of channel attributes + * @NCSI_CHANNEL_ATTR_ID: channel ID + * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number + * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number + * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string + * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags + * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in + * NCSI_CHANNEL_ACTIVE state + * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as + * preferred + * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs + * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel + * @NCSI_CHANNEL_ATTR_MAX: highest attribute number + */ +enum ncsi_nl_channel_attrs { + NCSI_CHANNEL_ATTR_UNSPEC, + NCSI_CHANNEL_ATTR, + NCSI_CHANNEL_ATTR_ID, + NCSI_CHANNEL_ATTR_VERSION_MAJOR, + NCSI_CHANNEL_ATTR_VERSION_MINOR, + NCSI_CHANNEL_ATTR_VERSION_STR, + NCSI_CHANNEL_ATTR_LINK_STATE, + NCSI_CHANNEL_ATTR_ACTIVE, + NCSI_CHANNEL_ATTR_FORCED, + NCSI_CHANNEL_ATTR_VLAN_LIST, + NCSI_CHANNEL_ATTR_VLAN_ID, + + __NCSI_CHANNEL_ATTR_AFTER_LAST, + NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1 +}; + +#endif /* __UAPI_NCSI_NETLINK_H__ */ -- cgit v1.2.3 From ed63afb8a318f6b3558d76afba7809daee4f28e5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 5 Mar 2018 20:44:18 +0800 Subject: sctp: add support for PR-SCTP Information for sendmsg This patch is to add support for PR-SCTP Information for sendmsg, as described in section 5.3.7 of RFC6458. With this option, you can specify pr_policy and pr_value for user data in sendmsg. It's also a necessary send info for sctp_sendv. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 4c4db14786bd..0dd1f82a4fa8 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -260,6 +260,19 @@ struct sctp_nxtinfo { sctp_assoc_t nxt_assoc_id; }; +/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ------------------- + * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo + */ +struct sctp_prinfo { + __u16 pr_policy; + __u32 pr_value; +}; + /* * sinfo_flags: 16 bits (unsigned integer) * @@ -293,6 +306,8 @@ typedef enum sctp_cmsg_type { #define SCTP_RCVINFO SCTP_RCVINFO SCTP_NXTINFO, /* 5.3.6 SCTP Next Receive Information Structure */ #define SCTP_NXTINFO SCTP_NXTINFO + SCTP_PRINFO, /* 5.3.7 SCTP PR-SCTP Information Structure */ +#define SCTP_PRINFO SCTP_PRINFO } sctp_cmsg_t; /* -- cgit v1.2.3 From 2c0dbaa0c43d04d8d6daf52adb724c5789676b15 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 5 Mar 2018 20:44:19 +0800 Subject: sctp: add support for SCTP_DSTADDRV4/6 Information for sendmsg This patch is to add support for Destination IPv4/6 Address options for sendmsg, as described in section 5.3.9/10 of RFC6458. With this option, you can provide more than one destination addrs to sendmsg when creating asoc, like sctp_connectx. It's also a necessary send info for sctp_sendv. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 0dd1f82a4fa8..a1bc35098033 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -308,6 +308,12 @@ typedef enum sctp_cmsg_type { #define SCTP_NXTINFO SCTP_NXTINFO SCTP_PRINFO, /* 5.3.7 SCTP PR-SCTP Information Structure */ #define SCTP_PRINFO SCTP_PRINFO + SCTP_AUTHINFO, /* 5.3.8 SCTP AUTH Information Structure (RESERVED) */ +#define SCTP_AUTHINFO SCTP_AUTHINFO + SCTP_DSTADDRV4, /* 5.3.9 SCTP Destination IPv4 Address Structure */ +#define SCTP_DSTADDRV4 SCTP_DSTADDRV4 + SCTP_DSTADDRV6, /* 5.3.10 SCTP Destination IPv6 Address Structure */ +#define SCTP_DSTADDRV6 SCTP_DSTADDRV6 } sctp_cmsg_t; /* -- cgit v1.2.3 From 4910280503f3af2857d5aa77e35b22d93a8960a8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 5 Mar 2018 20:44:20 +0800 Subject: sctp: add support for snd flag SCTP_SENDALL process in sendmsg This patch is to add support for snd flag SCTP_SENDALL process in sendmsg, as described in section 5.3.4 of RFC6458. With this flag, you can send the same data to all the asocs of this sk once. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index a1bc35098033..e94b6d297ad9 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -284,6 +284,8 @@ enum sctp_sinfo_flags { SCTP_ADDR_OVER = (1 << 1), /* Override the primary destination. */ SCTP_ABORT = (1 << 2), /* Send an ABORT message to the peer. */ SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ + /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ + SCTP_SENDALL = (1 << 6), SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ }; -- cgit v1.2.3 From 95da0cdb723260362fc126a563285ac66a193da7 Mon Sep 17 00:00:00 2001 From: Teng Qin Date: Tue, 6 Mar 2018 10:55:01 -0800 Subject: bpf: add support to read sample address in bpf program This commit adds new field "addr" to bpf_perf_event_data which could be read and used by bpf programs attached to perf events. The value of the field is copied from bpf_perf_event_data_kern.addr and contains the address value recorded by specifying sample_type with PERF_SAMPLE_ADDR when calling perf_event_open. Signed-off-by: Teng Qin Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf_perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h index 8f95303f9d80..eb1b9d21250c 100644 --- a/include/uapi/linux/bpf_perf_event.h +++ b/include/uapi/linux/bpf_perf_event.h @@ -13,6 +13,7 @@ struct bpf_perf_event_data { bpf_user_pt_regs_t regs; __u64 sample_period; + __u64 addr; }; #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ -- cgit v1.2.3 From 459d153d9916ea48b1550bbb6f2959dc03bff011 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Tue, 6 Mar 2018 18:11:14 +0100 Subject: net/sched: cls_flower: Add support to handle first frag as match field Allow setting firstfrag as matching option in tc flower classifier. # tc filter add dev eth0 protocol ip parent ffff: \ flower indev eth0 \ ip_flags firstfrag action mirred egress redirect dev eth1 Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7cafb26df555..be05e66c167b 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -475,6 +475,7 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; /* Match-all classifier */ -- cgit v1.2.3 From 84a1d9c4820080bebcbd413a845076dcb62f45fa Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Mar 2018 15:45:03 +0000 Subject: net: ethtool: extend RXNFC API to support RSS spreading of filter matches We use a two-step process to configure a filter with RSS spreading. First, the RSS context is allocated and configured using ETHTOOL_SRSSH; this returns an identifier (rss_context) which can then be passed to subsequent invocations of ETHTOOL_SRXCLSRLINS to specify that the offset from the RSS indirection table lookup should be added to the queue number (ring_cookie) when delivering the packet. Drivers for devices which can only use the indirection table entry directly (not add it to a base queue number) should reject rule insertions combining RSS with a nonzero ring_cookie. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 44a0b675a6bc..20da156aaf64 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -914,12 +914,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW * @data: Command-dependent value * @fs: Flow classification rule + * @rss_context: RSS context to be affected * @rule_cnt: Number of rules to be affected * @rule_locs: Array of used rule locations * * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following - * structure fields must not be used. + * structure fields must not be used, except that if @flow_type includes + * the %FLOW_RSS flag, then @rss_context determines which RSS context to + * act on. * * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues * on return. @@ -931,7 +934,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) * set in @data then special location values should not be used. * * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an - * existing rule on entry and @fs contains the rule on return. + * existing rule on entry and @fs contains the rule on return; if + * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is + * filled with the RSS context ID associated with the rule. * * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the * user buffer for @rule_locs on entry. On return, @data is the size @@ -942,7 +947,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update. * @fs.@location either specifies the location to use or is a special * location value with %RX_CLS_LOC_SPECIAL flag set. On return, - * @fs.@location is the actual rule location. + * @fs.@location is the actual rule location. If @fs.@flow_type + * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to + * use for flow spreading traffic which matches this rule. The value + * from the rxfh indirection table will be added to @fs.@ring_cookie + * to choose which ring to deliver to. * * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an * existing rule on entry. @@ -963,7 +972,10 @@ struct ethtool_rxnfc { __u32 flow_type; __u64 data; struct ethtool_rx_flow_spec fs; - __u32 rule_cnt; + union { + __u32 rule_cnt; + __u32 rss_context; + }; __u32 rule_locs[0]; }; @@ -990,7 +1002,11 @@ struct ethtool_rxfh_indir { /** * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH - * @rss_context: RSS context identifier. + * @rss_context: RSS context identifier. Context 0 is the default for normal + * traffic; other contexts can be referenced as the destination for RX flow + * classification rules. %ETH_RXFH_CONTEXT_ALLOC is used with command + * %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will + * contain the ID of the newly allocated context. * @indir_size: On entry, the array size of the user buffer for the * indirection table, which may be zero, or (for %ETHTOOL_SRSSH), * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH, @@ -1009,7 +1025,8 @@ struct ethtool_rxfh_indir { * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested * and a @indir_size of zero means the indir table should be reset to default - * values. An hfunc of zero means that hash function setting is not requested. + * values (if @rss_context == 0) or that the RSS context should be deleted. + * An hfunc of zero means that hash function setting is not requested. */ struct ethtool_rxfh { __u32 cmd; @@ -1021,6 +1038,7 @@ struct ethtool_rxfh { __u32 rsvd32; __u32 rss_config[0]; }; +#define ETH_RXFH_CONTEXT_ALLOC 0xffffffff #define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff /** @@ -1635,6 +1653,8 @@ static inline int ethtool_validate_duplex(__u8 duplex) /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 +/* Flag to enable RSS spreading of traffic matching rule (nfc only) */ +#define FLOW_RSS 0x20000000 /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.2.3 From 41aeefcc38a2643a62db46818a70a781efb40d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 13 Mar 2018 11:41:12 +0100 Subject: batman-adv: add DAT cache netlink support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dump the list of DAT cache entries via the netlink socket. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 56ae28934070..95ab5dbd09fa 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -272,6 +272,21 @@ enum batadv_nl_attrs { */ BATADV_ATTR_BLA_CRC, + /** + * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address + */ + BATADV_ATTR_DAT_CACHE_IP4ADDRESS, + + /** + * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address + */ + BATADV_ATTR_DAT_CACHE_HWADDRESS, + + /** + * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID + */ + BATADV_ATTR_DAT_CACHE_VID, + /* add attributes above here, update the policy in netlink.c */ /** @@ -361,6 +376,11 @@ enum batadv_nl_commands { */ BATADV_CMD_GET_BLA_BACKBONE, + /** + * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries + */ + BATADV_CMD_GET_DAT_CACHE, + /* add new commands above here */ /** -- cgit v1.2.3 From 53dd9a68ba683986ec90497586f94b941bb748a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 13 Mar 2018 11:41:13 +0100 Subject: batman-adv: add multicast flags netlink support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dump the list of multicast flags entries via the netlink socket. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 95ab5dbd09fa..324a0e1143e7 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -91,6 +91,53 @@ enum batadv_tt_client_flags { BATADV_TT_CLIENT_TEMP = (1 << 11), }; +/** + * enum batadv_mcast_flags_priv - Private, own multicast flags + * + * These are internal, multicast related flags. Currently they describe certain + * multicast related attributes of the segment this originator bridges into the + * mesh. + * + * Those attributes are used to determine the public multicast flags this + * originator is going to announce via TT. + * + * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier + * related flags are undefined. + */ +enum batadv_mcast_flags_priv { + /** + * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh + * interface. + */ + BATADV_MCAST_FLAGS_BRIDGED = (1 << 0), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier + * exists in the mesh + */ + BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS = (1 << 1), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier + * exists in the mesh + */ + BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS = (1 << 2), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier + * exists, whether it is potentially shadowing multicast listeners + * (i.e. querier is behind our own bridge segment) + */ + BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING = (1 << 3), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier + * exists, whether it is potentially shadowing multicast listeners + * (i.e. querier is behind our own bridge segment) + */ + BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING = (1 << 4), +}; + /** * enum batadv_nl_attrs - batman-adv netlink attributes */ @@ -287,6 +334,16 @@ enum batadv_nl_attrs { */ BATADV_ATTR_DAT_CACHE_VID, + /** + * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags + */ + BATADV_ATTR_MCAST_FLAGS, + + /** + * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags + */ + BATADV_ATTR_MCAST_FLAGS_PRIV, + /* add attributes above here, update the policy in netlink.c */ /** @@ -381,6 +438,11 @@ enum batadv_nl_commands { */ BATADV_CMD_GET_DAT_CACHE, + /** + * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags + */ + BATADV_CMD_GET_MCAST_FLAGS, + /* add new commands above here */ /** -- cgit v1.2.3 From 3ff547c06a7d75d72d37dae2c064fcf0672e56c0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:31 +0800 Subject: sctp: add support for SCTP AUTH Information for sendmsg This patch is to add support for SCTP AUTH Information for sendmsg, as described in section 5.3.8 of RFC6458. With this option, you can provide shared key identifier used for sending the user message. It's also a necessary send info for sctp_sendv. Note that it reuses sinfo->sinfo_tsn to indicate if this option is set and sinfo->sinfo_ssn to save the shkey ID which can be 0. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index e94b6d297ad9..47e781ebde05 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -273,6 +273,18 @@ struct sctp_prinfo { __u32 pr_value; }; +/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ------------------- + * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo + */ +struct sctp_authinfo { + __u16 auth_keynumber; +}; + /* * sinfo_flags: 16 bits (unsigned integer) * @@ -310,7 +322,7 @@ typedef enum sctp_cmsg_type { #define SCTP_NXTINFO SCTP_NXTINFO SCTP_PRINFO, /* 5.3.7 SCTP PR-SCTP Information Structure */ #define SCTP_PRINFO SCTP_PRINFO - SCTP_AUTHINFO, /* 5.3.8 SCTP AUTH Information Structure (RESERVED) */ + SCTP_AUTHINFO, /* 5.3.8 SCTP AUTH Information Structure */ #define SCTP_AUTHINFO SCTP_AUTHINFO SCTP_DSTADDRV4, /* 5.3.9 SCTP Destination IPv4 Address Structure */ #define SCTP_DSTADDRV4 SCTP_DSTADDRV4 -- cgit v1.2.3 From 601590ec155aadf5daa17a6f63a06d1bba5b5ce9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:32 +0800 Subject: sctp: add sockopt SCTP_AUTH_DEACTIVATE_KEY This patch is to add sockopt SCTP_AUTH_DEACTIVATE_KEY, as described in section 8.3.4 of RFC6458. This set option indicates that the application will no longer send user messages using the indicated key identifier. Note that RFC requires that only deactivated keys that are no longer used by an association can be deleted, but for the backward compatibility, it is not to check deactivated when deleting or replacing one sh_key. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 47e781ebde05..08fc313829f4 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_RECVRCVINFO 32 #define SCTP_RECVNXTINFO 33 #define SCTP_DEFAULT_SNDINFO 34 +#define SCTP_AUTH_DEACTIVATE_KEY 35 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. -- cgit v1.2.3 From ec2e506c680deaa8e1a087986db6d73ba63a04be Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:33 +0800 Subject: sctp: add SCTP_AUTH_FREE_KEY type for AUTHENTICATION_EVENT This patch is to add SCTP_AUTH_FREE_KEY type for AUTHENTICATION_EVENT, as described in section 6.1.8 of RFC6458. SCTP_AUTH_FREE_KEY: This report indicates that the SCTP implementation will no longer use the key identifier specified in auth_keynumber. After deactivating a key, it would never be used again, which means it's refcnt can't be held/increased by new chunks. But there may be some chunks in out queue still using it. So only when refcnt is 1, which means no chunk in outqueue is using/holding this key either, this EVENT would be sent. When users receive this notification, they could do DEL_KEY sockopt to remove this shkey, and also tell the peer that this key won't be used in any chunk thoroughly from now on, then the peer can remove it as well safely. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 08fc313829f4..18ebbfeee4af 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -518,7 +518,11 @@ struct sctp_authkey_event { sctp_assoc_t auth_assoc_id; }; -enum { SCTP_AUTH_NEWKEY = 0, }; +enum { + SCTP_AUTH_NEW_KEY, +#define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY /* compatible with before */ + SCTP_AUTH_FREE_KEY, +}; /* * 6.1.9. SCTP_SENDER_DRY_EVENT -- cgit v1.2.3 From 30f6ebf65bc46161c5aaff1db2e6e7c76aa4a06b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Mar 2018 19:05:34 +0800 Subject: sctp: add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT This patch is to add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT, as described in section 6.1.8 of RFC6458. SCTP_AUTH_NO_AUTH: This report indicates that the peer does not support SCTP authentication as defined in [RFC4895]. Note that the implementation is quite similar as that of SCTP_ADAPTATION_INDICATION. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 18ebbfeee4af..afd4346386e0 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -522,6 +522,7 @@ enum { SCTP_AUTH_NEW_KEY, #define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY /* compatible with before */ SCTP_AUTH_FREE_KEY, + SCTP_AUTH_NO_AUTH, }; /* -- cgit v1.2.3 From 615755a77b2461ed78dfafb8a6649456201949c7 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 14 Mar 2018 10:23:21 -0700 Subject: bpf: extend stackmap to save binary_build_id+offset instead of address Currently, bpf stackmap store address for each entry in the call trace. To map these addresses to user space files, it is necessary to maintain the mapping from these virtual address to symbols in the binary. Usually, the user space profiler (such as perf) has to scan /proc/pid/maps at the beginning of profiling, and monitor mmap2() calls afterwards. Given the cost of maintaining the address map, this solution is not practical for system wide profiling that is always on. This patch tries to solve this problem with a variation of stackmap. This variation is enabled by flag BPF_F_STACK_BUILD_ID. Instead of storing addresses, the variation stores ELF file build_id + offset. Build ID is a 20-byte unique identifier for ELF files. The following command shows the Build ID of /bin/bash: [user@]$ readelf -n /bin/bash ... Build ID: XXXXXXXXXX ... With BPF_F_STACK_BUILD_ID, bpf_get_stackid() tries to parse Build ID for each entry in the call trace, and translate it into the following struct: struct bpf_stack_build_id_offset { __s32 status; unsigned char build_id[BPF_BUILD_ID_SIZE]; union { __u64 offset; __u64 ip; }; }; The search of build_id is limited to the first page of the file, and this page should be in page cache. Otherwise, we fallback to store ip for this entry (ip field in struct bpf_stack_build_id_offset). This requires the build_id to be stored in the first page. A quick survey of binary and dynamic library files in a few different systems shows that almost all binary and dynamic library files have build_id in the first page. Build_id is only meaningful for user stack. If a kernel stack is added to a stackmap with BPF_F_STACK_BUILD_ID, it will automatically fallback to only store ip (status == BPF_STACK_BUILD_ID_IP). Similarly, if build_id lookup failed for some reason, it will also fallback to store ip. User space can access struct bpf_stack_build_id_offset with bpf syscall BPF_MAP_LOOKUP_ELEM. It is necessary for user space to maintain mapping from build id to binary files. This mostly static mapping is much easier to maintain than per process address maps. Note: Stackmap with build_id only works in non-nmi context at this time. This is because we need to take mm->mmap_sem for find_vma(). If this changes, we would like to allow build_id lookup in nmi context. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a66769e5875..1e15d1724d89 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -231,6 +231,28 @@ enum bpf_attach_type { #define BPF_F_RDONLY (1U << 3) #define BPF_F_WRONLY (1U << 4) +/* Flag for stack_map, store build_id+offset instead of pointer */ +#define BPF_F_STACK_BUILD_ID (1U << 5) + +enum bpf_stack_build_id_status { + /* user space need an empty entry to identify end of a trace */ + BPF_STACK_BUILD_ID_EMPTY = 0, + /* with valid build_id and offset */ + BPF_STACK_BUILD_ID_VALID = 1, + /* couldn't get build_id, fallback to ip */ + BPF_STACK_BUILD_ID_IP = 2, +}; + +#define BPF_BUILD_ID_SIZE 20 +struct bpf_stack_build_id { + __s32 status; + unsigned char build_id[BPF_BUILD_ID_SIZE]; + union { + __u64 offset; + __u64 ip; + }; +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ -- cgit v1.2.3 From 7156d194a0772f733865267e7207e0b08f81b02b Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 16 Mar 2018 10:51:07 -0700 Subject: tcp: add snd_ssthresh stat in SCM_TIMESTAMPING_OPT_STATS This patch adds TCP_NLA_SND_SSTHRESH stat into SCM_TIMESTAMPING_OPT_STATS that reports tcp_sock.snd_ssthresh. Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Priyaranjan Jha Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 4c0ae0faf7ca..560374c978f9 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -243,6 +243,7 @@ enum { TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ TCP_NLA_CA_STATE, /* ca_state of socket */ + TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ }; -- cgit v1.2.3 From 928df1880e24bcd47d6359ff86df24db3dfba3c3 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 15 Mar 2018 16:48:51 +0100 Subject: tipc: obsolete TIPC_ZONE_SCOPE Publications for TIPC_CLUSTER_SCOPE and TIPC_ZONE_SCOPE are in all aspects handled the same way, both on the publishing node and on the receiving nodes. Despite previous ambitions to the contrary, this is never going to change, so we take the conseqeunce of this and obsolete TIPC_ZONE_SCOPE and related macros/functions. Whenever a user is doing a bind() or a sendmsg() attempt using ZONE_SCOPE we translate this internally to CLUSTER_SCOPE, while we remain compatible with users and remote nodes still using ZONE_SCOPE. Furthermore, the non-formalized scope value 0 has always been permitted for use during lookup, with the same meaning as ZONE_SCOPE/CLUSTER_SCOPE. We now permit it even as binding scope, but for compatibility reasons we choose to not change the value of TIPC_CLUSTER_SCOPE. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 102 ++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 48 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 14bacc7e6cef..4ac9f1f02b06 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -61,50 +61,6 @@ struct tipc_name_seq { __u32 upper; }; -/* TIPC Address Size, Offset, Mask specification for Z.C.N - */ -#define TIPC_NODE_BITS 12 -#define TIPC_CLUSTER_BITS 12 -#define TIPC_ZONE_BITS 8 - -#define TIPC_NODE_OFFSET 0 -#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS -#define TIPC_ZONE_OFFSET (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) - -#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) -#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) -#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) - -#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) -#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) -#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) - -#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) - -static inline __u32 tipc_addr(unsigned int zone, - unsigned int cluster, - unsigned int node) -{ - return (zone << TIPC_ZONE_OFFSET) | - (cluster << TIPC_CLUSTER_OFFSET) | - node; -} - -static inline unsigned int tipc_zone(__u32 addr) -{ - return addr >> TIPC_ZONE_OFFSET; -} - -static inline unsigned int tipc_cluster(__u32 addr) -{ - return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; -} - -static inline unsigned int tipc_node(__u32 addr) -{ - return addr & TIPC_NODE_MASK; -} - /* * Application-accessible port name types */ @@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr) /* * Publication scopes when binding port names and port name sequences */ -#define TIPC_ZONE_SCOPE 1 -#define TIPC_CLUSTER_SCOPE 2 -#define TIPC_NODE_SCOPE 3 +enum tipc_scope { + TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */ + TIPC_NODE_SCOPE = 3 +}; /* * Limiting values for messages @@ -243,7 +200,7 @@ struct sockaddr_tipc { struct tipc_group_req { __u32 type; /* group id */ __u32 instance; /* member id */ - __u32 scope; /* zone/cluster/node */ + __u32 scope; /* cluster/node */ __u32 flags; }; @@ -268,4 +225,53 @@ struct tipc_sioc_ln_req { __u32 bearer_id; char linkname[TIPC_MAX_LINK_NAME]; }; + + +/* The macros and functions below are deprecated: + */ + +#define TIPC_ZONE_SCOPE 1 + +#define TIPC_NODE_BITS 12 +#define TIPC_CLUSTER_BITS 12 +#define TIPC_ZONE_BITS 8 + +#define TIPC_NODE_OFFSET 0 +#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS +#define TIPC_ZONE_OFFSET (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) + +#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) +#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) +#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) + +#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) +#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) +#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) + +#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) + +static inline __u32 tipc_addr(unsigned int zone, + unsigned int cluster, + unsigned int node) +{ + return (zone << TIPC_ZONE_OFFSET) | + (cluster << TIPC_CLUSTER_OFFSET) | + node; +} + +static inline unsigned int tipc_zone(__u32 addr) +{ + return addr >> TIPC_ZONE_OFFSET; +} + +static inline unsigned int tipc_cluster(__u32 addr) +{ + return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; +} + +static inline unsigned int tipc_node(__u32 addr) +{ + return addr & TIPC_NODE_MASK; +} + #endif -- cgit v1.2.3 From 4f738adba30a7cfc006f605707e7aee847ffefa0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:10 -0700 Subject: bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data This implements a BPF ULP layer to allow policy enforcement and monitoring at the socket layer. In order to support this a new program type BPF_PROG_TYPE_SK_MSG is used to run the policy at the sendmsg/sendpage hook. To attach the policy to sockets a sockmap is used with a new program attach type BPF_SK_MSG_VERDICT. Similar to previous sockmap usages when a sock is added to a sockmap, via a map update, if the map contains a BPF_SK_MSG_VERDICT program type attached then the BPF ULP layer is created on the socket and the attached BPF_PROG_TYPE_SK_MSG program is run for every msg in sendmsg case and page/offset in sendpage case. BPF_PROG_TYPE_SK_MSG Semantics/API: BPF_PROG_TYPE_SK_MSG supports only two return codes SK_PASS and SK_DROP. Returning SK_DROP free's the copied data in the sendmsg case and in the sendpage case leaves the data untouched. Both cases return -EACESS to the user. Returning SK_PASS will allow the msg to be sent. In the sendmsg case data is copied into kernel space buffers before running the BPF program. The kernel space buffers are stored in a scatterlist object where each element is a kernel memory buffer. Some effort is made to coalesce data from the sendmsg call here. For example a sendmsg call with many one byte iov entries will likely be pushed into a single entry. The BPF program is run with data pointers (start/end) pointing to the first sg element. In the sendpage case data is not copied. We opt not to copy the data by default here, because the BPF infrastructure does not know what bytes will be needed nor when they will be needed. So copying all bytes may be wasteful. Because of this the initial start/end data pointers are (0,0). Meaning no data can be read or written. This avoids reading data that may be modified by the user. A new helper is added later in this series if reading and writing the data is needed. The helper call will do a copy by default so that the page is exclusively owned by the BPF call. The verdict from the BPF_PROG_TYPE_SK_MSG applies to the entire msg in the sendmsg() case and the entire page/offset in the sendpage case. This avoids ambiguity on how to handle mixed return codes in the sendmsg case. Again a helper is added later in the series if a verdict needs to apply to multiple system calls and/or only a subpart of the currently being processed message. The helper msg_redirect_map() can be used to select the socket to send the data on. This is used similar to existing redirect use cases. This allows policy to redirect msgs. Pseudo code simple example: The basic logic to attach a program to a socket is as follows, // load the programs bpf_prog_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); // lookup the sockmap bpf_map_msg = bpf_object__find_map_by_name(obj, "my_sock_map"); // get fd for sockmap map_fd_msg = bpf_map__fd(bpf_map_msg); // attach program to sockmap bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0); Adding sockets to the map is done in the normal way, // Add a socket 'fd' to sockmap at location 'i' bpf_map_update_elem(map_fd_msg, &i, fd, BPF_ANY); After the above any socket attached to "my_sock_map", in this case 'fd', will run the BPF msg verdict program (msg_prog) on every sendmsg and sendpage system call. For a complete example see BPF selftests or sockmap samples. Implementation notes: It seemed the simplest, to me at least, to use a refcnt to ensure psock is not lost across the sendmsg copy into the sg, the bpf program running on the data in sg_data, and the final pass to the TCP stack. Some performance testing may show a better method to do this and avoid the refcnt cost, but for now use the simpler method. Another item that will come after basic support is in place is supporting MSG_MORE flag. At the moment we call sendpages even if the MSG_MORE flag is set. An enhancement would be to collect the pages into a larger scatterlist and pass down the stack. Notice that bpf_tcp_sendmsg() could support this with some additional state saved across sendmsg calls. I built the code to support this without having to do refactoring work. Other features TBD include ZEROCOPY and the TCP_RECV_QUEUE/TCP_NO_QUEUE support. This will follow initial series shortly. Future work could improve size limits on the scatterlist rings used here. Currently, we use MAX_SKB_FRAGS simply because this was being used already in the TLS case. Future work could extend the kernel sk APIs to tune this depending on workload. This is a trade-off between memory usage and throughput performance. Signed-off-by: John Fastabend Acked-by: David S. Miller Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1e15d1724d89..ef529539abde 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_PROG_TYPE_SK_MSG, }; enum bpf_attach_type { @@ -143,6 +144,7 @@ enum bpf_attach_type { BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, BPF_CGROUP_DEVICE, + BPF_SK_MSG_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -718,6 +720,15 @@ union bpf_attr { * int bpf_override_return(pt_regs, rc) * @pt_regs: pointer to struct pt_regs * @rc: the return value to set + * + * int bpf_msg_redirect_map(map, key, flags) + * Redirect msg to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_PASS + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -779,7 +790,8 @@ union bpf_attr { FN(perf_prog_read_value), \ FN(getsockopt), \ FN(override_return), \ - FN(sock_ops_cb_flags_set), + FN(sock_ops_cb_flags_set), \ + FN(msg_redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -942,6 +954,14 @@ enum sk_action { SK_PASS, }; +/* user accessible metadata for SK_MSG packet hook, new fields must + * be added to the end of this structure + */ +struct sk_msg_md { + void *data; + void *data_end; +}; + #define BPF_TAG_SIZE 8 struct bpf_prog_info { -- cgit v1.2.3 From 2a100317c9ebc204a166f16294884fbf9da074ce Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:15 -0700 Subject: bpf: sockmap, add bpf_msg_apply_bytes() helper A single sendmsg or sendfile system call can contain multiple logical messages that a BPF program may want to read and apply a verdict. But, without an apply_bytes helper any verdict on the data applies to all bytes in the sendmsg/sendfile. Alternatively, a BPF program may only care to read the first N bytes of a msg. If the payload is large say MB or even GB setting up and calling the BPF program repeatedly for all bytes, even though the verdict is already known, creates unnecessary overhead. To allow BPF programs to control how many bytes a given verdict applies to we implement a bpf_msg_apply_bytes() helper. When called from within a BPF program this sets a counter, internal to the BPF infrastructure, that applies the last verdict to the next N bytes. If the N is smaller than the current data being processed from a sendmsg/sendfile call, the first N bytes will be sent and the BPF program will be re-run with start_data pointing to the N+1 byte. If N is larger than the current data being processed the BPF verdict will be applied to multiple sendmsg/sendfile calls until N bytes are consumed. Note1 if a socket closes with apply_bytes counter non-zero this is not a problem because data is not being buffered for N bytes and is sent as its received. Signed-off-by: John Fastabend Acked-by: David S. Miller Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ef529539abde..a557a2a5d72d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -791,7 +791,8 @@ union bpf_attr { FN(getsockopt), \ FN(override_return), \ FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), + FN(msg_redirect_map), \ + FN(msg_apply_bytes), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 91843d540a139eb8070bcff8aa10089164436deb Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:20 -0700 Subject: bpf: sockmap, add msg_cork_bytes() helper In the case where we need a specific number of bytes before a verdict can be assigned, even if the data spans multiple sendmsg or sendfile calls. The BPF program may use msg_cork_bytes(). The extreme case is a user can call sendmsg repeatedly with 1-byte msg segments. Obviously, this is bad for performance but is still valid. If the BPF program needs N bytes to validate a header it can use msg_cork_bytes to specify N bytes and the BPF program will not be called again until N bytes have been accumulated. The infrastructure will attempt to coalesce data if possible so in many cases (most my use cases at least) the data will be in a single scatterlist element with data pointers pointing to start/end of the element. However, this is dependent on available memory so is not guaranteed. So BPF programs must validate data pointer ranges, but this is the case anyways to convince the verifier the accesses are valid. Signed-off-by: John Fastabend Acked-by: David S. Miller Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a557a2a5d72d..1765cfb16c99 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -792,7 +792,8 @@ union bpf_attr { FN(override_return), \ FN(sock_ops_cb_flags_set), \ FN(msg_redirect_map), \ - FN(msg_apply_bytes), + FN(msg_apply_bytes), \ + FN(msg_cork_bytes), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 015632bb30daaaee64e1bcac07570860e0bf3092 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:25 -0700 Subject: bpf: sk_msg program helper bpf_sk_msg_pull_data Currently, if a bpf sk msg program is run the program can only parse data that the (start,end) pointers already consumed. For sendmsg hooks this is likely the first scatterlist element. For sendpage this will be the range (0,0) because the data is shared with userspace and by default we want to avoid allowing userspace to modify data while (or after) BPF verdict is being decided. To support pulling in additional bytes for parsing use a new helper bpf_sk_msg_pull(start, end, flags) which works similar to cls tc logic. This helper will attempt to point the data start pointer at 'start' bytes offest into msg and data end pointer at 'end' bytes offset into message. After basic sanity checks to ensure 'start' <= 'end' and 'end' <= msg_length there are a few cases we need to handle. First the sendmsg hook has already copied the data from userspace and has exclusive access to it. Therefor, it is not necessesary to copy the data. However, it may be required. After finding the scatterlist element with 'start' offset byte in it there are two cases. One the range (start,end) is entirely contained in the sg element and is already linear. All that is needed is to update the data pointers, no allocate/copy is needed. The other case is (start, end) crosses sg element boundaries. In this case we allocate a block of size 'end - start' and copy the data to linearize it. Next sendpage hook has not copied any data in initial state so that data pointers are (0,0). In this case we handle it similar to the above sendmsg case except the allocation/copy must always happen. Then when sending the data we have possibly three memory regions that need to be sent, (0, start - 1), (start, end), and (end + 1, msg_length). This is required to ensure any writes by the BPF program are correctly transmitted. Lastly this operation will invalidate any previous data checks so BPF programs will have to revalidate pointers after making this BPF call. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1765cfb16c99..18b7c510c511 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -793,7 +793,8 @@ union bpf_attr { FN(sock_ops_cb_flags_set), \ FN(msg_redirect_map), \ FN(msg_apply_bytes), \ - FN(msg_cork_bytes), + FN(msg_cork_bytes), \ + FN(msg_pull_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From d719e3f21cf91d3f82bd827d46199ba41af2f73a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Mar 2018 11:57:20 +0100 Subject: netfilter: nft_ct: add NFT_CT_{SRC,DST}_{IP,IP6} All existing keys, except the NFT_CT_SRC and NFT_CT_DST are assumed to have strict datatypes. This is causing problems with sets and concatenations given the specific length of these keys is not known. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 66dceee0ae30..6a3d653d5b27 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -909,8 +909,8 @@ enum nft_rt_attributes { * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms * @NFT_CT_HELPER: connection tracking helper assigned to conntrack * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol - * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) - * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated) * @NFT_CT_PROTOCOL: conntrack layer 4 protocol * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination @@ -920,6 +920,10 @@ enum nft_rt_attributes { * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack + * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address) + * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address) + * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) + * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) */ enum nft_ct_keys { NFT_CT_STATE, @@ -941,6 +945,10 @@ enum nft_ct_keys { NFT_CT_AVGPKT, NFT_CT_ZONE, NFT_CT_EVENTMASK, + NFT_CT_SRC_IP, + NFT_CT_DST_IP, + NFT_CT_SRC_IP6, + NFT_CT_DST_IP6, }; /** -- cgit v1.2.3 From 472a73e00757b971d613d796374d2727b2e4954d Mon Sep 17 00:00:00 2001 From: Jack Ma Date: Mon, 19 Mar 2018 09:41:59 +1300 Subject: netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK targets. This patch introduces a new feature that allows bitshifting (left and right) operations to co-operate with existing iptables options. Reviewed-by: Florian Westphal Signed-off-by: Jack Ma Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connmark.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index 408a9654f05c..1aa5c955ee1e 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -19,11 +19,21 @@ enum { XT_CONNMARK_RESTORE }; +enum { + D_SHIFT_LEFT = 0, + D_SHIFT_RIGHT, +}; + struct xt_connmark_tginfo1 { __u32 ctmark, ctmask, nfmask; __u8 mode; }; +struct xt_connmark_tginfo2 { + __u32 ctmark, ctmask, nfmask; + __u8 shift_dir, shift_bits, mode; +}; + struct xt_connmark_mtinfo1 { __u32 mark, mask; __u8 invert; -- cgit v1.2.3 From 20710b3b81895c89e92bcc32ce85c0bede1171f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Mar 2018 12:33:51 +0100 Subject: netfilter: ctnetlink: synproxy support This patch exposes synproxy information per-conntrack. Moreover, send sequence adjustment events once server sends us the SYN,ACK packet, so we can synchronize the sequence adjustment too for packets going as reply from the server, as part of the synproxy logic. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 9574bd40870b..c712eb6879f1 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -129,6 +129,7 @@ enum ip_conntrack_events { IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ + IPCT_SYNPROXY, /* synproxy has been set */ #ifdef __KERNEL__ __IPCT_MAX #endif diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 7397e022ce6e..77987111cab0 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -54,6 +54,7 @@ enum ctattr_type { CTA_MARK_MASK, CTA_LABELS, CTA_LABELS_MASK, + CTA_SYNPROXY, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -190,6 +191,15 @@ enum ctattr_natseq { }; #define CTA_NAT_SEQ_MAX (__CTA_NAT_SEQ_MAX - 1) +enum ctattr_synproxy { + CTA_SYNPROXY_UNSPEC, + CTA_SYNPROXY_ISN, + CTA_SYNPROXY_ITS, + CTA_SYNPROXY_TSOFF, + __CTA_SYNPROXY_MAX, +}; +#define CTA_SYNPROXY_MAX (__CTA_SYNPROXY_MAX - 1) + enum ctattr_expect { CTA_EXPECT_UNSPEC, CTA_EXPECT_MASTER, -- cgit v1.2.3 From 5adc1668ddc42bb44fd6d006cacad74ed0cbf49d Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 4 Mar 2018 09:28:53 +0100 Subject: netfilter: ebtables: add support for matching ICMP type and code We already have ICMPv6 type/code matches. This adds support for IPv4 ICMP matches in the same way. Signed-off-by: Matthias Schiffer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/ebt_ip.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h index 8e462fb1983f..4ed7fbb0a482 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_ip.h +++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h @@ -24,8 +24,9 @@ #define EBT_IP_PROTO 0x08 #define EBT_IP_SPORT 0x10 #define EBT_IP_DPORT 0x20 +#define EBT_IP_ICMP 0x40 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\ - EBT_IP_SPORT | EBT_IP_DPORT ) + EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP) #define EBT_IP_MATCH "ip" /* the same values are used for the invflags */ @@ -38,8 +39,14 @@ struct ebt_ip_info { __u8 protocol; __u8 bitmask; __u8 invflags; - __u16 sport[2]; - __u16 dport[2]; + union { + __u16 sport[2]; + __u8 icmp_type[2]; + }; + union { + __u16 dport[2]; + __u8 icmp_code[2]; + }; }; #endif -- cgit v1.2.3 From 78d9f4d49bbecd101b4e5faf19f8f70719fee2ca Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 4 Mar 2018 09:28:54 +0100 Subject: netfilter: ebtables: add support for matching IGMP type We already have ICMPv6 type/code matches (which can be used to distinguish different types of MLD packets). Add support for IPv4 IGMP matches in the same way. Signed-off-by: Matthias Schiffer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/ebt_ip.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h index 4ed7fbb0a482..46d6261370b0 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_ip.h +++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h @@ -25,8 +25,9 @@ #define EBT_IP_SPORT 0x10 #define EBT_IP_DPORT 0x20 #define EBT_IP_ICMP 0x40 +#define EBT_IP_IGMP 0x80 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\ - EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP) + EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP | EBT_IP_IGMP) #define EBT_IP_MATCH "ip" /* the same values are used for the invflags */ @@ -42,6 +43,7 @@ struct ebt_ip_info { union { __u16 sport[2]; __u8 icmp_type[2]; + __u8 igmp_type[2]; }; union { __u16 dport[2]; -- cgit v1.2.3 From 2cb021f5de55b1d158fa18b0215a4613c3289a82 Mon Sep 17 00:00:00 2001 From: Dmitry Lebed Date: Thu, 1 Mar 2018 12:39:16 +0300 Subject: cfg80211/nl80211: add CAC_STARTED event CAC_STARTED event is needed for DFS offload feature and should be generated by driver/HW if DFS_OFFLOAD is enabled. Signed-off-by: Dmitry Lebed Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c13c84304be3..b8e989073cbb 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5204,6 +5204,8 @@ enum nl80211_smps_mode { * non-operating channel is expired and no longer valid. New CAC must * be done on this channel before starting the operation. This is not * applicable for ETSI dfs domain where pre-CAC is valid for ever. + * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started, + * should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled. */ enum nl80211_radar_event { NL80211_RADAR_DETECTED, @@ -5211,6 +5213,7 @@ enum nl80211_radar_event { NL80211_RADAR_CAC_ABORTED, NL80211_RADAR_NOP_FINISHED, NL80211_RADAR_PRE_CAC_EXPIRED, + NL80211_RADAR_CAC_STARTED, }; /** -- cgit v1.2.3 From 13cf6dec93e021ebd297619ea1926aea31b6430b Mon Sep 17 00:00:00 2001 From: Dmitry Lebed Date: Thu, 1 Mar 2018 12:39:15 +0300 Subject: cfg80211/nl80211: add DFS offload flag Add wiphy EXT_FEATURE flag to indicate that HW or driver does all DFS actions by itself. User-space functionality already implemented in hostapd using vendor-specific (QCA) OUI to advertise DFS offload support. Need to introduce generic flag to inform about DFS offload support. For devices with DFS_OFFLOAD flag set user-space will no longer need to issue CAC or do any actions in response to "radar detected" events. HW will do everything by itself and send events to user-space to indicate that CAC was started/finished, etc. Signed-off-by: Dmitrii Lebed Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b8e989073cbb..60fefc5a2ea4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4999,6 +4999,12 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan. * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan. * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan. + * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions. + * Device or driver will do all DFS-related actions by itself, + * informing user-space about CAC progress, radar detection event, + * channel change triggered by radar detection event. + * No need to start CAC from user-space, no need to react to + * "radar detected" event. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -5029,6 +5035,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_LOW_SPAN_SCAN, NL80211_EXT_FEATURE_LOW_POWER_SCAN, NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, + NL80211_EXT_FEATURE_DFS_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From c30b70deb5f4861f590031c33fd3ec6cc63f1df1 Mon Sep 17 00:00:00 2001 From: GhantaKrishnamurthy MohanKrishna Date: Wed, 21 Mar 2018 14:37:44 +0100 Subject: tipc: implement socket diagnostics for AF_TIPC This commit adds socket diagnostics capability for AF_TIPC in netlink family NETLINK_SOCK_DIAG in a new kernel module (diag.ko). The following are key design considerations: - config TIPC_DIAG has default y, like INET_DIAG. - only requests with flag NLM_F_DUMP is supported (dump all). - tipc_sock_diag_req message is introduced to send filter parameters. - the response attributes are of TLV, some nested. To avoid exposing data structures between diag and tipc modules and avoid code duplication, the following additions are required: - export tipc_nl_sk_walk function to reuse socket iterator. - export tipc_sk_fill_sock_diag to fill the tipc diag attributes. - create a sock_diag response message in __tipc_add_sock_diag defined in diag.c and use the above exported tipc_sk_fill_sock_diag to fill response. Acked-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: GhantaKrishnamurthy MohanKrishna Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 18 ++++++++++++++++++ include/uapi/linux/tipc_sockets_diag.h | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 include/uapi/linux/tipc_sockets_diag.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 469aa67a5ecb..d7cec0480d70 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -114,6 +114,13 @@ enum { TIPC_NLA_SOCK_REF, /* u32 */ TIPC_NLA_SOCK_CON, /* nest */ TIPC_NLA_SOCK_HAS_PUBL, /* flag */ + TIPC_NLA_SOCK_STAT, /* nest */ + TIPC_NLA_SOCK_TYPE, /* u32 */ + TIPC_NLA_SOCK_INO, /* u32 */ + TIPC_NLA_SOCK_UID, /* u32 */ + TIPC_NLA_SOCK_TIPC_STATE, /* u32 */ + TIPC_NLA_SOCK_COOKIE, /* u64 */ + TIPC_NLA_SOCK_PAD, /* flag */ __TIPC_NLA_SOCK_MAX, TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 @@ -238,6 +245,17 @@ enum { TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1 }; +/* Nest, socket statistics info */ +enum { + TIPC_NLA_SOCK_STAT_RCVQ, /* u32 */ + TIPC_NLA_SOCK_STAT_SENDQ, /* u32 */ + TIPC_NLA_SOCK_STAT_LINK_CONG, /* flag */ + TIPC_NLA_SOCK_STAT_CONN_CONG, /* flag */ + + __TIPC_NLA_SOCK_STAT_MAX, + TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1 +}; + /* Nest, link propreties. Valid for link, media and bearer */ enum { TIPC_NLA_PROP_UNSPEC, diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h new file mode 100644 index 000000000000..7678cf2f0dcc --- /dev/null +++ b/include/uapi/linux/tipc_sockets_diag.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* AF_TIPC sock_diag interface for querying open sockets */ + +#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__ +#define _UAPI__TIPC_SOCKETS_DIAG_H__ + +#include +#include + +/* Request */ +struct tipc_sock_diag_req { + __u8 sdiag_family; /* must be AF_TIPC */ + __u8 sdiag_protocol; /* must be 0 */ + __u16 pad; /* must be 0 */ + __u32 tidiag_states; /* query*/ +}; +#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */ -- cgit v1.2.3 From 872619d8cf810c17279335ef531a2a34f3b4e589 Mon Sep 17 00:00:00 2001 From: GhantaKrishnamurthy MohanKrishna Date: Wed, 21 Mar 2018 14:37:45 +0100 Subject: tipc: step sk->sk_drops when rcv buffer is full Currently when tipc is unable to queue a received message on a socket, the message is rejected back to the sender with error TIPC_ERR_OVERLOAD. However, the application on this socket has no knowledge about these discards. In this commit, we try to step the sk_drops counter when tipc is unable to queue a received message. Export sk_drops using tipc socket diagnostics. Acked-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: GhantaKrishnamurthy MohanKrishna Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d7cec0480d70..d896ded51bcb 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -251,6 +251,7 @@ enum { TIPC_NLA_SOCK_STAT_SENDQ, /* u32 */ TIPC_NLA_SOCK_STAT_LINK_CONG, /* flag */ TIPC_NLA_SOCK_STAT_CONN_CONG, /* flag */ + TIPC_NLA_SOCK_STAT_DROP, /* u32 */ __TIPC_NLA_SOCK_STAT_MAX, TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1 -- cgit v1.2.3 From 14452ca3b5ce304fb2fea96dbc9ca1e4e7978551 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 21 Mar 2018 19:48:14 -0600 Subject: net: qualcomm: rmnet: Export mux_id and flags to netlink Define new netlink attributes for rmnet mux_id and flags. These flags / mux_id were earlier using vlan flags / id respectively. The flag bits are also moved to uapi and are renamed with prefix RMNET_FLAG_*. Also add the rmnet policy to handle the new netlink attributes. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 11d0c0ea2bfa..68699f654118 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -959,4 +959,25 @@ enum { #define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) +/* rmnet section */ + +#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) +#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) + +enum { + IFLA_RMNET_UNSPEC, + IFLA_RMNET_MUX_ID, + IFLA_RMNET_FLAGS, + __IFLA_RMNET_MAX, +}; + +#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) + +struct ifla_rmnet_flags { + __u32 flags; + __u32 mask; +}; + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From c46234ebb4d1eee5e09819f49169e51cfc6eb909 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 22 Mar 2018 10:10:35 -0700 Subject: tls: RX path for ktls Add rx path for tls software implementation. recvmsg, splice_read, and poll implemented. An additional sockopt TLS_RX is added, with the same interface as TLS_TX. Either TLX_RX or TLX_TX may be provided separately, or together (with two different setsockopt calls with appropriate keys). Control messages are passed via CMSG in a similar way to transmit. If no cmsg buffer is passed, then only application data records will be passed to userspace, and EIO is returned for other types of alerts. EBADMSG is passed for decryption errors, and EMSGSIZE is passed for framing too big, and EBADMSG for framing too small (matching openssl semantics). EINVAL is returned for TLS versions that do not match the original setsockopt call. All are unrecoverable. strparser is used to parse TLS framing. Decryption is done directly in to userspace buffers if they are large enough to support it, otherwise sk_cow_data is called (similar to ipsec), and buffers are decrypted in place and copied. splice_read always decrypts in place, since no buffers are provided to decrypt in to. sk_poll is overridden, and only returns POLLIN if a full TLS message is received. Otherwise we wait for strparser to finish reading a full frame. Actual decryption is only done during recvmsg or splice_read calls. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 293b2cdad88d..c6633e97eca4 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -38,6 +38,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ +#define TLS_RX 2 /* Set receive parameters */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -59,6 +60,7 @@ #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 #define TLS_SET_RECORD_TYPE 1 +#define TLS_GET_RECORD_TYPE 2 struct tls_crypto_info { __u16 version; -- cgit v1.2.3 From d50ccc2d3909fc1b4d40e4af16b026f05dc68707 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 22 Mar 2018 20:42:50 +0100 Subject: tipc: add 128-bit node identifier We add a 128-bit node identity, as an alternative to the currently used 32-bit node address. For the sake of compatibility and to minimize message header changes we retain the existing 32-bit address field. When not set explicitly by the user, this field will be filled with a hash value generated from the much longer node identity, and be used as a shorthand value for the latter. We permit either the address or the identity to be set by configuration, but not both, so when the address value is set by a legacy user the corresponding 128-bit node identity is generated based on the that value. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d896ded51bcb..0affb682e5e3 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -169,6 +169,8 @@ enum { TIPC_NLA_NET_UNSPEC, TIPC_NLA_NET_ID, /* u32 */ TIPC_NLA_NET_ADDR, /* u32 */ + TIPC_NLA_NET_NODEID, /* u64 */ + TIPC_NLA_NET_NODEID_W1, /* u64 */ __TIPC_NLA_NET_MAX, TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 -- cgit v1.2.3 From e1577c1c881b09e9f15a743a4a1907815b74d0f7 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Mon, 20 Nov 2017 16:14:30 +0200 Subject: ethtool: Add support for configuring PFC stall prevention in ethtool In the event where the device unexpectedly becomes unresponsive for a long period of time, flow control mechanism may propagate pause frames which will cause congestion spreading to the entire network. To prevent this scenario, when the device is stalled for a period longer than a pre-configured timeout, flow control mechanisms are automatically disabled. This patch adds support for the ETHTOOL_PFC_STALL_PREVENTION as a tunable. This API provides support for configuring flow control storm prevention timeout (msec). Signed-off-by: Inbar Karmy Cc: Michal Kubecek Cc: Andrew Lunn Signed-off-by: Saeed Mahameed --- include/uapi/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 20da156aaf64..4ca65b56084f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -217,10 +217,14 @@ struct ethtool_value { __u32 data; }; +#define PFC_STORM_PREVENTION_AUTO 0xffff +#define PFC_STORM_PREVENTION_DISABLE 0 + enum tunable_id { ETHTOOL_ID_UNSPEC, ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, + ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ /* * Add your fresh new tubale attribute above and remember to update * tunable_strings[] in net/core/ethtool.c -- cgit v1.2.3 From c4f6699dfcb8558d138fe838f741b2c10f416cf9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 28 Mar 2018 12:05:37 -0700 Subject: bpf: introduce BPF_RAW_TRACEPOINT Introduce BPF_PROG_TYPE_RAW_TRACEPOINT bpf program type to access kernel internal arguments of the tracepoints in their raw form. >From bpf program point of view the access to the arguments look like: struct bpf_raw_tracepoint_args { __u64 args[0]; }; int bpf_prog(struct bpf_raw_tracepoint_args *ctx) { // program can read args[N] where N depends on tracepoint // and statically verified at program load+attach time } kprobe+bpf infrastructure allows programs access function arguments. This feature allows programs access raw tracepoint arguments. Similar to proposed 'dynamic ftrace events' there are no abi guarantees to what the tracepoints arguments are and what their meaning is. The program needs to type cast args properly and use bpf_probe_read() helper to access struct fields when argument is a pointer. For every tracepoint __bpf_trace_##call function is prepared. In assembler it looks like: (gdb) disassemble __bpf_trace_xdp_exception Dump of assembler code for function __bpf_trace_xdp_exception: 0xffffffff81132080 <+0>: mov %ecx,%ecx 0xffffffff81132082 <+2>: jmpq 0xffffffff811231f0 where TRACE_EVENT(xdp_exception, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, u32 act), The above assembler snippet is casting 32-bit 'act' field into 'u64' to pass into bpf_trace_run3(), while 'dev' and 'xdp' args are passed as-is. All of ~500 of __bpf_trace_*() functions are only 5-10 byte long and in total this approach adds 7k bytes to .text. This approach gives the lowest possible overhead while calling trace_xdp_exception() from kernel C code and transitioning into bpf land. Since tracepoint+bpf are used at speeds of 1M+ events per second this is valuable optimization. The new BPF_RAW_TRACEPOINT_OPEN sys_bpf command is introduced that returns anon_inode FD of 'bpf-raw-tracepoint' object. The user space looks like: // load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type prog_fd = bpf_prog_load(...); // receive anon_inode fd for given bpf_raw_tracepoint with prog attached raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd); Ctrl-C of tracing daemon or cmdline tool that uses this feature will automatically detach bpf program, unload it and unregister tracepoint probe. On the kernel side the __bpf_raw_tp_map section of pointers to tracepoint definition and to __bpf_trace_*() probe function is used to find a tracepoint with "xdp_exception" name and corresponding __bpf_trace_xdp_exception() probe function which are passed to tracepoint_probe_register() to connect probe with tracepoint. Addition of bpf_raw_tracepoint doesn't interfere with ftrace and perf tracepoint mechanisms. perf_event_open() can be used in parallel on the same tracepoint. Multiple bpf_raw_tracepoint_open("xdp_exception", prog_fd) are permitted. Each with its own bpf program. The kernel will execute all tracepoint probes and all attached bpf programs. In the future bpf_raw_tracepoints can be extended with query/introspection logic. __bpf_raw_tp_map section logic was contributed by Steven Rostedt Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) Acked-by: Steven Rostedt (VMware) Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18b7c510c511..1878201c2d77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_cmd { BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, }; enum bpf_map_type { @@ -134,6 +135,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, }; enum bpf_attach_type { @@ -344,6 +346,11 @@ union bpf_attr { __aligned_u64 prog_ids; __u32 prog_cnt; } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From f8d16d3edb4dbae080df04318423c360de3c594d Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:45 -0500 Subject: nl80211: Add SOCKET_OWNER support to JOIN_IBSS Signed-off-by: Denis Kenzior [johannes: fix race with wdev lock/unlock by just acquiring once] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 60fefc5a2ea4..266b43c4f6e1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1962,6 +1962,8 @@ enum nl80211_commands { * multicast group. * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the * station will deauthenticate when the socket is closed. + * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically + * torn down when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. -- cgit v1.2.3 From 188c1b3c04d69e842122daf201f07a34fcfad039 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:46 -0500 Subject: nl80211: Add SOCKET_OWNER support to JOIN_MESH Signed-off-by: Denis Kenzior [johannes: fix race with wdev lock/unlock by just acquiring once] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 266b43c4f6e1..98eb37def37d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1964,6 +1964,8 @@ enum nl80211_commands { * station will deauthenticate when the socket is closed. * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically * torn down when the socket is closed. + * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be + * automatically torn down when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. -- cgit v1.2.3 From 466a306142c002b40deaa58da94741af4153d1c4 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:47 -0500 Subject: nl80211: Add SOCKET_OWNER support to START_AP Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98eb37def37d..9ea3d6039eca 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1966,6 +1966,8 @@ enum nl80211_commands { * torn down when the socket is closed. * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be * automatically torn down when the socket is closed. + * If set during %NL80211_CMD_START_AP the AP will be automatically + * disabled when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. -- cgit v1.2.3 From 6a671a50f8199b3e1fe49fa8afff0fc8335da79c Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:41 -0500 Subject: nl80211: Add CMD_CONTROL_PORT_FRAME API This commit also adds cfg80211_rx_control_port function. This is used to generate a CMD_CONTROL_PORT_FRAME event out to userspace. The conn_owner_nlportid is used as the unicast destination. This means that userspace must specify NL80211_ATTR_SOCKET_OWNER flag if control port over nl80211 routing is requested in NL80211_CMD_CONNECT, NL80211_CMD_ASSOCIATE, NL80211_CMD_START_AP or IBSS/mesh join. Signed-off-by: Denis Kenzior [johannes: fix return value of cfg80211_rx_control_port()] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9ea3d6039eca..6a3cc7a635b5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -990,6 +990,17 @@ * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed * &NL80211_CMD_DISCONNECT should be indicated instead. * + * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request + * and RX notification. This command is used both as a request to transmit + * a control port frame and as a notification that a control port frame + * has been received. %NL80211_ATTR_FRAME is used to specify the + * frame contents. The frame is the raw EAPoL data, without ethernet or + * 802.11 headers. + * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added + * indicating the protocol type of the received frame; whether the frame + * was received unencrypted and the MAC address of the peer respectively. + * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host @@ -1228,6 +1239,8 @@ enum nl80211_commands { NL80211_CMD_STA_OPMODE_CHANGED, + NL80211_CMD_CONTROL_PORT_FRAME, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 2576a9ace47eba28a682d249d1d6402f891808c9 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:42 -0500 Subject: nl80211: Implement TX of control port frames This commit implements the TX side of NL80211_CMD_CONTROL_PORT_FRAME. Userspace provides the raw EAPoL frame using NL80211_ATTR_FRAME. Userspace should also provide the destination address and the protocol type to use when sending the frame. This is used to implement TX of Pre-authentication frames. If CONTROL_PORT_ETHERTYPE_NO_ENCRYPT is specified, then the driver will be asked not to encrypt the outgoing frame. A new EXT_FEATURE flag is introduced so that nl80211 code can check whether a given wiphy has capability to pass EAPoL frames over nl80211. Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6a3cc7a635b5..3167d6f7fc68 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5024,6 +5024,8 @@ enum nl80211_feature_flags { * channel change triggered by radar detection event. * No need to start CAC from user-space, no need to react to * "radar detected" event. + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and + * receiving control port frames over nl80211 instead of the netdevice. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -5055,6 +5057,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_LOW_POWER_SCAN, NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, NL80211_EXT_FEATURE_DFS_OFFLOAD, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 64bf3d4bc2b0725b3c5ffadd982a9746bfc738b7 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Mon, 26 Mar 2018 12:52:43 -0500 Subject: nl80211: Add CONTROL_PORT_OVER_NL80211 attribute Signed-off-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3167d6f7fc68..15daf5e2638d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -542,7 +542,8 @@ * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, - * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, %NL80211_ATTR_MAC_HINT, and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and * %NL80211_ATTR_WIPHY_FREQ_HINT. * If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are * restrictions on BSS selection, i.e., they effectively prevent roaming @@ -1488,6 +1489,15 @@ enum nl80211_commands { * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom * ethertype frames used for key negotiation must not be encrypted. + * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control + * port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE) + * will be sent directly to the network interface or sent via the NL80211 + * socket. If this attribute is missing, then legacy behavior of sending + * control port frames directly to the network interface is used. If the + * flag is included, then control port frames are sent over NL80211 instead + * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is + * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER + * flag. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -2647,6 +2657,8 @@ enum nl80211_attrs { NL80211_ATTR_NSS, NL80211_ATTR_ACK_SIGNAL, + NL80211_ATTR_CONTROL_PORT_OVER_NL80211, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 39c202d228c3da5a5531be847e9b06cc9b787f31 Mon Sep 17 00:00:00 2001 From: Bernie Harris Date: Wed, 21 Mar 2018 15:42:15 +1300 Subject: netfilter: ebtables: Add support for specifying match revision Currently ebtables assumes that the revision number of all match modules is 0, which is an issue when trying to use existing xtables matches with ebtables. The solution is to modify ebtables to allow extensions to specify a revision number, similar to iptables. This gets passed down to the kernel, which is then able to find the match module correctly. To main binary backwards compatibility, the size of the ebt_entry structures is not changed, only the size of the name field is decreased by 1 byte to make room for the revision field. Signed-off-by: Bernie Harris Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/ebtables.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h index 9ff57c0a0199..0c7dc8315013 100644 --- a/include/uapi/linux/netfilter_bridge/ebtables.h +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -20,6 +20,7 @@ #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN #define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN +#define EBT_EXTENSION_MAXNAMELEN 31 /* verdicts >0 are "branches" */ #define EBT_ACCEPT -1 @@ -120,7 +121,10 @@ struct ebt_entries { struct ebt_entry_match { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_match *match; } u; /* size of data */ @@ -130,7 +134,10 @@ struct ebt_entry_match { struct ebt_entry_watcher { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_target *watcher; } u; /* size of data */ @@ -140,7 +147,10 @@ struct ebt_entry_watcher { struct ebt_entry_target { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_target *target; } u; /* size of data */ -- cgit v1.2.3 From 5e43f899b03a3492ce5fc44e8900becb04dae9c0 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:00 -0700 Subject: bpf: Check attach type at prog load time == The problem == There are use-cases when a program of some type can be attached to multiple attach points and those attach points must have different permissions to access context or to call helpers. E.g. context structure may have fields for both IPv4 and IPv6 but it doesn't make sense to read from / write to IPv6 field when attach point is somewhere in IPv4 stack. Same applies to BPF-helpers: it may make sense to call some helper from some attach point, but not from other for same prog type. == The solution == Introduce `expected_attach_type` field in in `struct bpf_attr` for `BPF_PROG_LOAD` command. If scenario described in "The problem" section is the case for some prog type, the field will be checked twice: 1) At load time prog type is checked to see if attach type for it must be known to validate program permissions correctly. Prog will be rejected with EINVAL if it's the case and `expected_attach_type` is not specified or has invalid value. 2) At attach time `attach_type` is compared with `expected_attach_type`, if prog type requires to have one, and, if they differ, attach will be rejected with EINVAL. The `expected_attach_type` is now available as part of `struct bpf_prog` in both `bpf_verifier_ops->is_valid_access()` and `bpf_verifier_ops->get_func_proto()` () and can be used to check context accesses and calls to helpers correspondingly. Initially the idea was discussed by Alexei Starovoitov and Daniel Borkmann here: https://marc.info/?l=linux-netdev&m=152107378717201&w=2 Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1878201c2d77..102718624d1e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -296,6 +296,11 @@ union bpf_attr { __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 4fbac77d2d092b475dda9eea66da674369665427 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:02 -0700 Subject: bpf: Hooks for sys_bind == The problem == There is a use-case when all processes inside a cgroup should use one single IP address on a host that has multiple IP configured. Those processes should use the IP for both ingress and egress, for TCP and UDP traffic. So TCP/UDP servers should be bound to that IP to accept incoming connections on it, and TCP/UDP clients should make outgoing connections from that IP. It should not require changing application code since it's often not possible. Currently it's solved by intercepting glibc wrappers around syscalls such as `bind(2)` and `connect(2)`. It's done by a shared library that is preloaded for every process in a cgroup so that whenever TCP/UDP server calls `bind(2)`, the library replaces IP in sockaddr before passing arguments to syscall. When application calls `connect(2)` the library transparently binds the local end of connection to that IP (`bind(2)` with `IP_BIND_ADDRESS_NO_PORT` to avoid performance penalty). Shared library approach is fragile though, e.g.: * some applications clear env vars (incl. `LD_PRELOAD`); * `/etc/ld.so.preload` doesn't help since some applications are linked with option `-z nodefaultlib`; * other applications don't use glibc and there is nothing to intercept. == The solution == The patch provides much more reliable in-kernel solution for the 1st part of the problem: binding TCP/UDP servers on desired IP. It does not depend on application environment and implementation details (whether glibc is used or not). It adds new eBPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` and attach types `BPF_CGROUP_INET4_BIND` and `BPF_CGROUP_INET6_BIND` (similar to already existing `BPF_CGROUP_INET_SOCK_CREATE`). The new program type is intended to be used with sockets (`struct sock`) in a cgroup and provided by user `struct sockaddr`. Pointers to both of them are parts of the context passed to programs of newly added types. The new attach types provides hooks in `bind(2)` system call for both IPv4 and IPv6 so that one can write a program to override IP addresses and ports user program tries to bind to and apply such a program for whole cgroup. == Implementation notes == [1] Separate attach types for `AF_INET` and `AF_INET6` are added intentionally to prevent reading/writing to offsets that don't make sense for corresponding socket family. E.g. if user passes `sockaddr_in` it doesn't make sense to read from / write to `user_ip6[]` context fields. [2] The write access to `struct bpf_sock_addr_kern` is implemented using special field as an additional "register". There are just two registers in `sock_addr_convert_ctx_access`: `src` with value to write and `dst` with pointer to context that can't be changed not to break later instructions. But the fields, allowed to write to, are not available directly and to access them address of corresponding pointer has to be loaded first. To get additional register the 1st not used by `src` and `dst` one is taken, its content is saved to `bpf_sock_addr_kern.tmp_reg`, then the register is used to load address of pointer field, and finally the register's content is restored from the temporary field after writing `src` value. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 102718624d1e..ce3e69e3c793 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -136,6 +136,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_RAW_TRACEPOINT, + BPF_PROG_TYPE_CGROUP_SOCK_ADDR, }; enum bpf_attach_type { @@ -147,6 +148,8 @@ enum bpf_attach_type { BPF_SK_SKB_STREAM_VERDICT, BPF_CGROUP_DEVICE, BPF_SK_MSG_VERDICT, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, __MAX_BPF_ATTACH_TYPE }; @@ -1010,6 +1013,26 @@ struct bpf_map_info { __u64 netns_ino; } __attribute__((aligned(8))); +/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed + * by user and intended to be used by socket (e.g. to bind to, depends on + * attach attach type). + */ +struct bpf_sock_addr { + __u32 user_family; /* Allows 4-byte read, but no write. */ + __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. + * Stored in network byte order. + */ + __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ + __u32 user_port; /* Allows 4-byte read and write. + * Stored in network byte order + */ + __u32 family; /* Allows 4-byte read, but no write */ + __u32 type; /* Allows 4-byte read, but no write */ + __u32 protocol; /* Allows 4-byte read, but no write */ +}; + /* User bpf_sock_ops struct to access socket values and specify request ops * and their replies. * Some of this fields are in network (bigendian) byte order and may need -- cgit v1.2.3 From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:05 -0700 Subject: bpf: Hooks for sys_connect == The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ce3e69e3c793..77afaf1ba556 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -150,6 +150,8 @@ enum bpf_attach_type { BPF_SK_MSG_VERDICT, BPF_CGROUP_INET4_BIND, BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, __MAX_BPF_ATTACH_TYPE }; @@ -744,6 +746,13 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_PASS * + * int bpf_bind(ctx, addr, addr_len) + * Bind socket to address. Only binding to IP is supported, no port can be + * set in addr. + * @ctx: pointer to context of type bpf_sock_addr + * @addr: pointer to struct sockaddr to bind socket to + * @addr_len: length of sockaddr structure + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -809,7 +818,8 @@ union bpf_attr { FN(msg_redirect_map), \ FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ - FN(msg_pull_data), + FN(msg_pull_data), \ + FN(bind), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From aac3fc320d9404f2665a8b1249dc3170d5fa3caf Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:07 -0700 Subject: bpf: Post-hooks for sys_bind "Post-hooks" are hooks that are called right before returning from sys_bind. At this time IP and port are already allocated and no further changes to `struct sock` can happen before returning from sys_bind but BPF program has a chance to inspect the socket and change sys_bind result. Specifically it can e.g. inspect what port was allocated and if it doesn't satisfy some policy, BPF program can force sys_bind to fail and return EPERM to user. Another example of usage is recording the IP:port pair to some map to use it in later calls to sys_connect. E.g. if some TCP server inside cgroup was bound to some IP:port_n, it can be recorded to a map. And later when some TCP client inside same cgroup is trying to connect to 127.0.0.1:port_n, BPF hook for sys_connect can override the destination and connect application to IP:port_n instead of 127.0.0.1:port_n. That helps forcing all applications inside a cgroup to use desired IP and not break those applications if they e.g. use localhost to communicate between each other. == Implementation details == Post-hooks are implemented as two new attach types `BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`. Separate attach types for IPv4 and IPv6 are introduced to avoid access to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from `inet6_bind()` since those fields might not make sense in such cases. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77afaf1ba556..c5ec89732a8d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -152,6 +152,8 @@ enum bpf_attach_type { BPF_CGROUP_INET6_BIND, BPF_CGROUP_INET4_CONNECT, BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, __MAX_BPF_ATTACH_TYPE }; @@ -948,6 +950,15 @@ struct bpf_sock { __u32 protocol; __u32 mark; __u32 priority; + __u32 src_ip4; /* Allows 1,2,4-byte read. + * Stored in network byte order. + */ + __u32 src_ip6[4]; /* Allows 1,2,4-byte read. + * Stored in network byte order. + */ + __u32 src_port; /* Allows 4-byte read. + * Stored in host byte order + */ }; #define XDP_PACKET_HEADROOM 256 -- cgit v1.2.3 From 7a74d39cc2927302bc236397c1fdb1fe5be209ce Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 29 Mar 2018 23:20:44 +0200 Subject: tipc: tipc: rename address types in user api The three address type structs in the user API have names that in reality reflect the specific, non-Linux environment where they were originally created. We now give them more intuitive names, in accordance with how TIPC is described in the current documentation. struct tipc_portid -> struct tipc_socket_addr struct tipc_name -> struct tipc_service_addr struct tipc_name_seq -> struct tipc_service_range To avoid confusion, we also update some commmets and macro names to match the new terminology. For compatibility, we add macros that map all old names to the new ones. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 57 +++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 4ac9f1f02b06..156224ac3d74 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -45,33 +45,33 @@ * TIPC addressing primitives */ -struct tipc_portid { +struct tipc_socket_addr { __u32 ref; __u32 node; }; -struct tipc_name { +struct tipc_service_addr { __u32 type; __u32 instance; }; -struct tipc_name_seq { +struct tipc_service_range { __u32 type; __u32 lower; __u32 upper; }; /* - * Application-accessible port name types + * Application-accessible service types */ -#define TIPC_CFG_SRV 0 /* configuration service name type */ -#define TIPC_TOP_SRV 1 /* topology service name type */ -#define TIPC_LINK_STATE 2 /* link state name type */ -#define TIPC_RESERVED_TYPES 64 /* lowest user-publishable name type */ +#define TIPC_NODE_STATE 0 /* node state service type */ +#define TIPC_TOP_SRV 1 /* topology server service type */ +#define TIPC_LINK_STATE 2 /* link state service type */ +#define TIPC_RESERVED_TYPES 64 /* lowest user-allowed service type */ /* - * Publication scopes when binding port names and port name sequences + * Publication scopes when binding service / service range */ enum tipc_scope { TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */ @@ -108,28 +108,28 @@ enum tipc_scope { * TIPC topology subscription service definitions */ -#define TIPC_SUB_PORTS 0x01 /* filter for port availability */ -#define TIPC_SUB_SERVICE 0x02 /* filter for service availability */ -#define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */ +#define TIPC_SUB_PORTS 0x01 /* filter: evt at each match */ +#define TIPC_SUB_SERVICE 0x02 /* filter: evt at first up/last down */ +#define TIPC_SUB_CANCEL 0x04 /* filter: cancel a subscription */ #define TIPC_WAIT_FOREVER (~0) /* timeout for permanent subscription */ struct tipc_subscr { - struct tipc_name_seq seq; /* name sequence of interest */ + struct tipc_service_range seq; /* range of interest */ __u32 timeout; /* subscription duration (in ms) */ __u32 filter; /* bitmask of filter options */ char usr_handle[8]; /* available for subscriber use */ }; #define TIPC_PUBLISHED 1 /* publication event */ -#define TIPC_WITHDRAWN 2 /* withdraw event */ +#define TIPC_WITHDRAWN 2 /* withdrawal event */ #define TIPC_SUBSCR_TIMEOUT 3 /* subscription timeout event */ struct tipc_event { __u32 event; /* event type */ - __u32 found_lower; /* matching name seq instances */ - __u32 found_upper; /* " " " " */ - struct tipc_portid port; /* associated port */ + __u32 found_lower; /* matching range */ + __u32 found_upper; /* " " */ + struct tipc_socket_addr port; /* associated socket */ struct tipc_subscr s; /* associated subscription */ }; @@ -149,20 +149,20 @@ struct tipc_event { #define SOL_TIPC 271 #endif -#define TIPC_ADDR_NAMESEQ 1 -#define TIPC_ADDR_MCAST 1 -#define TIPC_ADDR_NAME 2 -#define TIPC_ADDR_ID 3 +#define TIPC_ADDR_MCAST 1 +#define TIPC_SERVICE_RANGE 1 +#define TIPC_SERVICE_ADDR 2 +#define TIPC_SOCKET_ADDR 3 struct sockaddr_tipc { unsigned short family; unsigned char addrtype; signed char scope; union { - struct tipc_portid id; - struct tipc_name_seq nameseq; + struct tipc_socket_addr id; + struct tipc_service_range nameseq; struct { - struct tipc_name name; + struct tipc_service_addr name; __u32 domain; } name; } addr; @@ -230,8 +230,13 @@ struct tipc_sioc_ln_req { /* The macros and functions below are deprecated: */ +#define TIPC_CFG_SRV 0 #define TIPC_ZONE_SCOPE 1 +#define TIPC_ADDR_NAMESEQ 1 +#define TIPC_ADDR_NAME 2 +#define TIPC_ADDR_ID 3 + #define TIPC_NODE_BITS 12 #define TIPC_CLUSTER_BITS 12 #define TIPC_ZONE_BITS 8 @@ -250,6 +255,10 @@ struct tipc_sioc_ln_req { #define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) +#define tipc_portid tipc_socket_addr +#define tipc_name tipc_service_addr +#define tipc_name_seq tipc_service_range + static inline __u32 tipc_addr(unsigned int zone, unsigned int cluster, unsigned int node) -- cgit v1.2.3 From 7494cfa6d36d1556f17baa012dd93833620783db Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 29 Mar 2018 23:20:45 +0200 Subject: tipc: avoid possible string overflow gcc points out that the combined length of the fixed-length inputs to l->name is larger than the destination buffer size: net/tipc/link.c: In function 'tipc_link_create': net/tipc/link.c:465:26: error: '%s' directive writing up to 32 bytes into a region of size between 26 and 58 [-Werror=format-overflow=] sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str); net/tipc/link.c:465:2: note: 'sprintf' output 11 or more bytes (assuming 75) into a destination of size 60 sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str); A detailed analysis reveals that the theoretical maximum length of a link name is: max self_str + 1 + max if_name + 1 + max peer_str + 1 + max if_name = 16 + 1 + 15 + 1 + 16 + 1 + 15 = 65 Since we also need space for a trailing zero we now set MAX_LINK_NAME to 68. Just to be on the safe side we also replace the sprintf() call with snprintf(). Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Reported-by: Arnd Bergmann Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 156224ac3d74..bf6d28677cfe 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -216,7 +216,7 @@ struct tipc_group_req { #define TIPC_MAX_MEDIA_NAME 16 #define TIPC_MAX_IF_NAME 16 #define TIPC_MAX_BEARER_NAME 32 -#define TIPC_MAX_LINK_NAME 60 +#define TIPC_MAX_LINK_NAME 68 #define SIOCGETLINKNAME SIOCPROTOPRIVATE -- cgit v1.2.3