From 23c42a403a9cfdbad6004a556c927be7dd61a8ee Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Oct 2018 15:07:40 +0200 Subject: netfilter: ipset: Introduction of new commands and protocol version 7 Two new commands (IPSET_CMD_GET_BYNAME, IPSET_CMD_GET_BYINDEX) are introduced. The new commands makes possible to eliminate the getsockopt operation (in iptables set/SET match/target) and thus use only netlink communication between userspace and kernel for ipset. With the new protocol version, userspace can exactly know which functionality is supported by the running kernel. Both the kernel and userspace is fully backward compatible. Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 60236f694143..ea69ca21ff23 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -13,8 +13,9 @@ #include -/* The protocol version */ -#define IPSET_PROTOCOL 6 +/* The protocol versions */ +#define IPSET_PROTOCOL 7 +#define IPSET_PROTOCOL_MIN 6 /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -38,17 +39,19 @@ enum ipset_cmd { IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */ + IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */ IPSET_MSG_MAX, /* Netlink message commands */ /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */ + IPSET_CMD_HELP, /* 17: Get help */ + IPSET_CMD_VERSION, /* 18: Get program version */ + IPSET_CMD_QUIT, /* 19: Quit from interactive mode */ IPSET_CMD_MAX, - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */ }; /* Attributes at command level */ @@ -66,6 +69,7 @@ enum { IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + IPSET_ATTR_INDEX, /* 11: Kernel index of set */ __IPSET_ATTR_CMD_MAX, }; #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) @@ -223,6 +227,7 @@ enum ipset_adt { /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. + * Also, IPSET_ATTR_INDEX must be changed. */ typedef __u16 ip_set_id_t; -- cgit v1.2.3 From e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:29 +0100 Subject: udp: implement GRO for plain UDP sockets. This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f5..30baccb6c9c4 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -- cgit v1.2.3 From b4d3069783bccf0c965468da7db141d359d796fc Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:16 +0100 Subject: vxlan: Allow configuration of DF behaviour Allow users to set the IPv4 DF bit in outgoing packets, or to inherit its value from the IPv4 inner header. If the encapsulated protocol is IPv6 and DF is configured to be inherited, always set it. For IPv4, inheriting DF from the inner header was probably intended from the very beginning judging by the comment to vxlan_xmit(), but it wasn't actually implemented -- also because it would have done more harm than good, without handling for ICMP Fragmentation Needed messages. According to RFC 7348, "Path MTU discovery MAY be used". An expired RFC draft, draft-saum-nvo3-pmtud-over-vxlan-05, whose purpose was to describe PMTUD implementation, says that "is a MUST that Vxlan gateways [...] SHOULD set the DF-bit [...]", whatever that means. Given this background, the only sane option is probably to let the user decide, and keep the current behaviour as default. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, move DF setting to if (!info) block in vxlan_xmit_one() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1debfa42cba1..efc588949431 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -533,6 +533,7 @@ enum { IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, + IFLA_VXLAN_DF, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -542,6 +543,14 @@ struct ifla_vxlan_port_range { __be16 high; }; +enum ifla_vxlan_df { + VXLAN_DF_UNSET = 0, + VXLAN_DF_SET, + VXLAN_DF_INHERIT, + __VXLAN_DF_END, + VXLAN_DF_MAX = __VXLAN_DF_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, -- cgit v1.2.3 From a025fb5f49ad38cf749753b16fcd031d0d678f2b Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:19 +0100 Subject: geneve: Allow configuration of DF behaviour draft-ietf-nvo3-geneve-08 says: It is strongly RECOMMENDED that Path MTU Discovery ([RFC1191], [RFC1981]) be used by setting the DF bit in the IP header when Geneve packets are transmitted over IPv4 (this is the default with IPv6). Now that ICMP error handling is working for GENEVE, we can comply with this recommendation. Make this configurable, though, to avoid breaking existing setups. By default, DF won't be set. It can be set or inherited from inner IPv4 packets. If it's configured to be inherited and we are encapsulating IPv6, it will be set. This only applies to non-lwt tunnels: if an external control plane is used, tunnel key will still control the DF flag. v2: - DF behaviour configuration only applies for non-lwt tunnels, apply DF setting only if (!geneve->collect_md) in geneve_xmit_skb() (Stephen Hemminger) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index efc588949431..f42c069d81db 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -566,10 +566,19 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, + IFLA_GENEVE_DF, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) +enum ifla_geneve_df { + GENEVE_DF_UNSET = 0, + GENEVE_DF_SET, + GENEVE_DF_INHERIT, + __GENEVE_DF_END, + GENEVE_DF_MAX = __GENEVE_DF_END - 1, +}; + /* PPP section */ enum { IFLA_PPP_UNSPEC, -- cgit v1.2.3 From c8123ead13a5c92dc5fd15c0fdfe88eef41e6ac1 Mon Sep 17 00:00:00 2001 From: Nitin Hande Date: Sun, 28 Oct 2018 21:02:45 -0700 Subject: bpf: Extend the sk_lookup() helper to XDP hookpoint. This patch proposes to extend the sk_lookup() BPF API to the XDP hookpoint. The sk_lookup() helper supports a lookup on incoming packet to find the corresponding socket that will receive this packet. Current support for this BPF API is at the tc hookpoint. This patch will extend this API at XDP hookpoint. A XDP program can map the incoming packet to the 5-tuple parameter and invoke the API to find the corresponding socket structure. Signed-off-by: Nitin Hande Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..47d606d744cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2201,6 +2201,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description @@ -2233,6 +2235,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description -- cgit v1.2.3 From 9bb7e0f24e7e7d00daa1219b14539e2e602649b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 10 Sep 2018 13:29:12 +0200 Subject: cfg80211: add peer measurement with FTM initiator API Add a new "peer measurement" API, that can be used to measure certain things related to a peer. Right now, only implement FTM (flight time measurement) over it, but the idea is that it'll be extensible to also support measuring the necessary things to calculate e.g. angle-of-arrival for WiGig. The API is structured to have a generic list of peers and channels to measure with/on, and then for each of those a set of measurements (again, only FTM right now) to perform. Results are sent to the requesting socket, including a final complete message. Closing the controlling netlink socket will abort a running measurement. v3: - add a bit to report "final" for partial results - remove list keeping etc. and just unicast out the results to the requester (big code reduction ...) - also send complete message unicast, and as a result remove the multicast group - separate out struct cfg80211_pmsr_ftm_request_peer from struct cfg80211_pmsr_request_peer - document timeout == 0 if no timeout - disallow setting timeout nl80211 attribute to 0, must not include attribute for no timeout - make MAC address randomization optional - change num bursts exponent default to 0 (1 burst, rather rather than the old default of 15==don't care) v4: - clarify NL80211_ATTR_TIMEOUT documentation v5: - remove unnecessary nl80211 multicast/family changes - remove partial results bit/flag, final is sufficient - add max_bursts_exponent, max_ftms_per_burst to capability - rename "frames per burst" -> "FTMs per burst" v6: - rename cfg80211_pmsr_free_wdev() to cfg80211_pmsr_wdev_down() and call it in leave, so the device can't go down with any pending measurements v7: - wording fixes (Lior) - fix ftm.max_bursts_exponent to allow having the limit of 0 (Lior) v8: - copyright statements - minor coding style fixes - fix error path leak Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 418 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 418 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6d610bae30a9..e45b88925783 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1036,6 +1036,30 @@ * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. * + * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s) + * with the given parameters, which are encapsulated in the nested + * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address + * randomization may be enabled and configured by specifying the + * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. + * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. + * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * the netlink extended ack message. + * + * To cancel a measurement, close the socket that requested it. + * + * Measurement results are reported to the socket that requested the + * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they + * become available, so applications must ensure a large enough socket + * buffer size. + * + * Depending on driver support it may or may not be possible to start + * multiple concurrent measurements. + * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the + * result notification from the driver to the requesting socket. + * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that + * the measurement completed, using the measurement cookie + * (%NL80211_ATTR_COOKIE). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1250,6 +1274,10 @@ enum nl80211_commands { NL80211_CMD_GET_FTM_RESPONDER_STATS, + NL80211_CMD_PEER_MEASUREMENT_START, + NL80211_CMD_PEER_MEASUREMENT_RESULT, + NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2254,6 +2282,16 @@ enum nl80211_commands { * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder * statistics, see &enum nl80211_ftm_responder_stats. * + * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32), + * if the attribute is not given no timeout is requested. Note that 0 is an + * invalid value. + * + * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result) + * data, uses nested attributes specified in + * &enum nl80211_peer_measurement_attrs. + * This is also used for capability advertisement in the wiphy information, + * with the appropriate sub-attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2699,6 +2737,10 @@ enum nl80211_attrs { NL80211_ATTR_FTM_RESPONDER_STATS, + NL80211_ATTR_TIMEOUT, + + NL80211_ATTR_PEER_MEASUREMENTS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5906,4 +5948,380 @@ enum nl80211_ftm_responder_stats { NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 }; +/** + * enum nl80211_preamble - frame preamble types + * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble + * @NL80211_PREAMBLE_HT: HT preamble + * @NL80211_PREAMBLE_VHT: VHT preamble + * @NL80211_PREAMBLE_DMG: DMG preamble + */ +enum nl80211_preamble { + NL80211_PREAMBLE_LEGACY, + NL80211_PREAMBLE_HT, + NL80211_PREAMBLE_VHT, + NL80211_PREAMBLE_DMG, +}; + +/** + * enum nl80211_peer_measurement_type - peer measurement types + * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use + * these numbers also for attributes + * + * @NL80211_PMSR_TYPE_FTM: flight time measurement + * + * @NUM_NL80211_PMSR_TYPES: internal + * @NL80211_PMSR_TYPE_MAX: highest type number + */ +enum nl80211_peer_measurement_type { + NL80211_PMSR_TYPE_INVALID, + + NL80211_PMSR_TYPE_FTM, + + NUM_NL80211_PMSR_TYPES, + NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1 +}; + +/** + * enum nl80211_peer_measurement_status - peer measurement status + * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully + * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused + * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out + * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent + * reason may be available in the response data + */ +enum nl80211_peer_measurement_status { + NL80211_PMSR_STATUS_SUCCESS, + NL80211_PMSR_STATUS_REFUSED, + NL80211_PMSR_STATUS_TIMEOUT, + NL80211_PMSR_STATUS_FAILURE, +}; + +/** + * enum nl80211_peer_measurement_req - peer measurement request attributes + * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement + * type-specific request data inside. The attributes used are from the + * enums named nl80211_peer_measurement__req. + * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported + * (flag attribute) + * + * @NUM_NL80211_PMSR_REQ_ATTRS: internal + * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_req { + __NL80211_PMSR_REQ_ATTR_INVALID, + + NL80211_PMSR_REQ_ATTR_DATA, + NL80211_PMSR_REQ_ATTR_GET_AP_TSF, + + /* keep last */ + NUM_NL80211_PMSR_REQ_ATTRS, + NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_resp - peer measurement response attributes + * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement + * type-specific results inside. The attributes used are from the enums + * named nl80211_peer_measurement__resp. + * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status + * (using values from &enum nl80211_peer_measurement_status.) + * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the + * result was measured; this value is not expected to be accurate to + * more than 20ms. (u64, nanoseconds) + * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface + * doing the measurement is connected to when the result was measured. + * This shall be accurately reported if supported and requested + * (u64, usec) + * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially + * (*e.g. with FTM per-burst data) this flag will be cleared on all but + * the last result; if all results are combined it's set on the single + * result. + * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore + * + * @NUM_NL80211_PMSR_RESP_ATTRS: internal + * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_resp { + __NL80211_PMSR_RESP_ATTR_INVALID, + + NL80211_PMSR_RESP_ATTR_DATA, + NL80211_PMSR_RESP_ATTR_STATUS, + NL80211_PMSR_RESP_ATTR_HOST_TIME, + NL80211_PMSR_RESP_ATTR_AP_TSF, + NL80211_PMSR_RESP_ATTR_FINAL, + NL80211_PMSR_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_RESP_ATTRS, + NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement + * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid + * + * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address + * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level + * attributes like %NL80211_ATTR_WIPHY_FREQ etc. + * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_req inside. + * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_resp inside. + * + * @NUM_NL80211_PMSR_PEER_ATTRS: internal + * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_peer_attrs { + __NL80211_PMSR_PEER_ATTR_INVALID, + + NL80211_PMSR_PEER_ATTR_ADDR, + NL80211_PMSR_PEER_ATTR_CHAN, + NL80211_PMSR_PEER_ATTR_REQ, + NL80211_PMSR_PEER_ATTR_RESP, + + /* keep last */ + NUM_NL80211_PMSR_PEER_ATTRS, + NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1, +}; + +/** + * enum nl80211_peer_measurement_attrs - peer measurement attributes + * @__NL80211_PMSR_ATTR_INVALID: invalid + * + * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability + * advertisement only, indicates the maximum number of peers + * measurements can be done with in a single request + * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability + * indicating that the connected AP's TSF can be reported in + * measurement results + * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability + * indicating that MAC address randomization is supported. + * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device, + * this contains a nesting indexed by measurement type, and + * type-specific capabilities inside, which are from the enums + * named nl80211_peer_measurement__capa. + * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is + * meaningless, just a list of peers to measure with, with the + * sub-attributes taken from + * &enum nl80211_peer_measurement_peer_attrs. + * + * @NUM_NL80211_PMSR_ATTR: internal + * @NL80211_PMSR_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_attrs { + __NL80211_PMSR_ATTR_INVALID, + + NL80211_PMSR_ATTR_MAX_PEERS, + NL80211_PMSR_ATTR_REPORT_AP_TSF, + NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR, + NL80211_PMSR_ATTR_TYPE_CAPA, + NL80211_PMSR_ATTR_PEERS, + + /* keep last */ + NUM_NL80211_PMSR_ATTR, + NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_capa - FTM capabilities + * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode + * is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP + * mode is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI + * data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic + * location data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits + * from &enum nl80211_preamble. + * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from + * &enum nl80211_chan_width indicating the supported channel + * bandwidths for FTM. Note that a higher channel bandwidth may be + * configured to allow for other measurements types with different + * bandwidth requirement in the same measurement. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating + * the maximum bursts exponent that can be used (if not present anything + * is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating + * the maximum FTMs per burst (if not present anything is valid) + * + * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_capa { + __NL80211_PMSR_FTM_CAPA_ATTR_INVALID, + + NL80211_PMSR_FTM_CAPA_ATTR_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC, + NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, + NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + + /* keep last */ + NUM_NL80211_PMSR_FTM_CAPA_ATTR, + NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_req - FTM request attributes + * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see + * &enum nl80211_preamble), optional for DMG (u32) + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in + * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" + * (u8, 0-15, optional with default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units + * of 100ms (u16, optional with default 0) + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 + * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with + * default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames + * requested per burst + * (u8, 0-31, optional with default 0 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries + * (u8, default 3) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data + * (flag) + * + * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal + * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_req { + __NL80211_PMSR_FTM_REQ_ATTR_INVALID, + + NL80211_PMSR_FTM_REQ_ATTR_ASAP, + NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE, + NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD, + NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + + /* keep last */ + NUM_NL80211_PMSR_FTM_REQ_ATTR, + NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons + * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used + * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder + * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement + * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is + * on a different channel, so can't measure (if we didn't know, we'd + * try and get no response) + * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM + * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps + * received + * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry + * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME) + * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed + * by the peer and are no longer supported + */ +enum nl80211_peer_measurement_ftm_failure_reasons { + NL80211_PMSR_FTM_FAILURE_UNSPECIFIED, + NL80211_PMSR_FTM_FAILURE_NO_RESPONSE, + NL80211_PMSR_FTM_FAILURE_REJECTED, + NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL, + NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE, + NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP, + NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS, +}; + +/** + * enum nl80211_peer_measurement_ftm_resp - FTM response attributes + * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason + * (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported + * as separate results then it will be the burst index 0...(N-1) and + * the top level will indicate partial results (u32) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames + * transmitted (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames + * that were acknowleged (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the + * busy peer (u32, seconds) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent + * used by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by + * the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used + * by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the + * FTM action frame (optional, nested, using &enum nl80211_rate_info + * attributes) + * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM + * action frame (optional, nested, using &enum nl80211_rate_info attrs) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that + * standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds, + * optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note + * that standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer + * (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * + * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal + * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_resp { + __NL80211_PMSR_FTM_RESP_ATTR_INVALID, + + NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON, + NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES, + NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME, + NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_TX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG, + NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_LCI, + NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, + NL80211_PMSR_FTM_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_FTM_RESP_ATTR, + NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From dbdaee7aa6e61f56aac61b71a7807e76f92cc895 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 15:48:53 -0400 Subject: {nl,mac}80211: report gate connectivity in station info Capture the current state of gate connectivity from the mesh formation field in mesh config whenever we receive a beacon, and report that via GET_STATION. This allows applications doing mesh peering in userspace to make peering decisions based on peers' current upstream connectivity. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e45b88925783..ff6005edf32f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3116,6 +3116,8 @@ enum nl80211_sta_bss_param { * with an FCS error (u32, from this station). This count may not include * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3158,6 +3160,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_ACK_SIGNAL_AVG, NL80211_STA_INFO_RX_MPDUS, NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From 01d66fbd5b18ac9f01a6a2ae1278189d19208ad5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 17:36:34 -0400 Subject: {nl,mac}80211: add dot11MeshConnectedToMeshGate to meshconf When userspace is controlling mesh routing, it may have better knowledge about whether a mesh STA is connected to a mesh gate than the kernel mpath table. Add dot11MeshConnectedToMeshGate to the mesh config so that such applications can explicitly signal that a mesh STA is connected to a gate, which will then be advertised in the beacon. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ff6005edf32f..51bd85b7d839 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3117,7 +3117,7 @@ enum nl80211_sta_bss_param { * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a - * mesh gate + * mesh gate (u8, 0 or 1) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3940,6 +3940,11 @@ enum nl80211_mesh_power_mode { * remove it from the STA's list of peers. You may set this to 0 to disable * the removal of the STA. Default is 30 minutes. * + * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA + * will advertise that it is connected to a gate in the mesh formation + * field. If left unset then the mesh formation field will only + * advertise such if there is an active root mesh path. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -3972,6 +3977,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_POWER_MODE, NL80211_MESHCONF_AWAKE_WINDOW, NL80211_MESHCONF_PLINK_TIMEOUT, + NL80211_MESHCONF_CONNECTED_TO_GATE, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, -- cgit v1.2.3 From 361800876f80da3915c46e388fc682532228b2c3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:44 +0100 Subject: ptp: add PTP_SYS_OFFSET_EXTENDED ioctl The PTP_SYS_OFFSET ioctl, which can be used to measure the offset between a PHC and the system clock, includes the total time that the driver needs to read the PHC timestamp. This typically involves reading of multiple PCI registers (sometimes in multiple iterations) and the register that contains the lowest bits of the timestamp is not read in the middle between the two readings of the system clock. This asymmetry causes the measured offset to have a significant error. Introduce a new ioctl, driver function, and helper functions, which allow the reading of the lowest register to be isolated from the other readings in order to reduce the asymmetry. The ioctl returns three timestamps for each measurement: - system time right before reading the lowest bits of the PHC timestamp - PHC time - system time immediately after reading the lowest bits of the PHC timestamp Cc: Richard Cochran Cc: Jacob Keller Cc: Marcelo Tosatti Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 3039bf6a742e..d73d83950265 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -84,6 +84,16 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +struct ptp_sys_offset_extended { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of [system, phc, system] time stamps. The kernel will provide + * 3*n_samples time stamps. + */ + struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; +}; + struct ptp_sys_offset_precise { struct ptp_clock_time device; struct ptp_clock_time sys_realtime; @@ -136,6 +146,8 @@ struct ptp_pin_desc { #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) +#define PTP_SYS_OFFSET_EXTENDED \ + _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From 48872c11b77271ef9b070bdc50afe6655c4eb9aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Nov 2018 09:11:31 -0800 Subject: net_sched: sch_fq: add dctcp-like marking Similar to 80ba92fa1a92 ("codel: add ce_threshold attribute") After EDT adoption, it became easier to implement DCTCP-like CE marking. In many cases, queues are not building in the network fabric but on the hosts themselves. If packets leaving fq missed their Earliest Departure Time by XXX usec, we mark them with ECN CE. This gives a feedback (after one RTT) to the sender to slow down and find better operating mode. Example : tc qd replace dev eth0 root fq ce_threshold 2.5ms Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 89ee47c2f17d..ee017bc057a3 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -864,6 +864,8 @@ enum { TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + __TCA_FQ_MAX }; @@ -882,6 +884,7 @@ struct tc_fq_qd_stats { __u32 inactive_flows; __u32 throttled_flows; __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ }; /* Heavy-Hitter Filter */ -- cgit v1.2.3 From 9b076f1c0f4869b838a1b7aa0edb5664d47ec8aa Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:07:14 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC A new event mask FAN_OPEN_EXEC has been defined so that users have the ability to receive events specifically when a file has been opened with the intent to be executed. Events of FAN_OPEN_EXEC type will be generated when a file has been opened using either execve(), execveat() or uselib() system calls. The feature is implemented within fsnotify_open() by generating the FAN_OPEN_EXEC event type if __FMODE_EXEC is set within file->f_flags. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b86740d1c50a..d9664fbc905b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -10,6 +10,7 @@ #define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ #define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -- cgit v1.2.3 From 66917a3130f218dcef9eeab4fd11a71cd00cd7c9 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:12:44 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC_PERM A new event mask FAN_OPEN_EXEC_PERM has been defined. This allows users to receive events and grant access to files that are intending to be opened for execution. Events of FAN_OPEN_EXEC_PERM type will be generated when a file has been opened by using either execve(), execveat() or uselib() system calls. This acts in the same manner as previous permission event mask, meaning that an access response is required from the user application in order to permit any further operations on the file. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d9664fbc905b..909c98fcace2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -16,6 +16,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ #define FAN_ONDIR 0x40000000 /* event occurred against dir */ -- cgit v1.2.3 From 5c72299fba9df407c2f2994e194edebf878996ee Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Mon, 12 Nov 2018 16:15:55 -0800 Subject: net: sched: cls_flower: Classify packets using port ranges Added support in tc flower for filtering based on port ranges. Example: 1. Match on a port range: ------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\ action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 ip_proto tcp dst_port range 20-30 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 85 sec used 3 sec Action statistics: Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) backlog 0b 0p requeues 0 2. Match on IP address and port range: -------------------------------------- $ tc filter add dev enp4s0 protocol ip parent ffff:\ prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\ skip_hw action drop $ tc -s filter show dev enp4s0 parent ffff: filter protocol ip pref 1 flower chain 0 handle 0x2 eth_type ipv4 ip_proto tcp dst_ip 192.168.1.1 dst_port range 100-200 skip_hw not_in_hw action order 1: gact action drop random type none pass val 0 index 2 ref 1 bind 1 installed 58 sec used 2 sec Action statistics: Sent 920 bytes 20 pkt (dropped 20, overlimits 0 requeues 0) backlog 0b 0p requeues 0 v4: 1. Added condition before setting port key. 2. Organized setting and dumping port range keys into functions and added validation of input range. v3: 1. Moved new fields in UAPI enum to the end of enum. 2. Removed couple of empty lines. v2: Addressed Jiri's comments: 1. Added separate functions for dst and src comparisons. 2. Removed endpoint enum. 3. Added new bit TCA_FLOWER_FLAGS_RANGE to decide normal/range lookup. 4. Cleaned up fl_lookup function. Signed-off-by: Amritha Nambiar Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 401d0c1e612d..95d0db2a8350 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -485,6 +485,11 @@ enum { TCA_FLOWER_IN_HW_COUNT, + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ + __TCA_FLOWER_MAX, }; @@ -518,6 +523,8 @@ enum { TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), }; +#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ + /* Match-all classifier */ enum { -- cgit v1.2.3 From dfdda82e3b84c13601be09f8351ec4f15a4fbe03 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Wed, 7 Nov 2018 00:00:02 +0300 Subject: crypto: streebog - register Streebog in hash info for IMA Register Streebog hash function in Hash Info arrays to let IMA use it for its purposes. Cc: linux-integrity@vger.kernel.org Signed-off-by: Vitaly Chikunov Reviewed-by: Mimi Zohar Signed-off-by: Herbert Xu --- include/uapi/linux/hash_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h index eea5d02c58de..74a8609fcb4d 100644 --- a/include/uapi/linux/hash_info.h +++ b/include/uapi/linux/hash_info.h @@ -33,6 +33,8 @@ enum hash_algo { HASH_ALGO_TGR_160, HASH_ALGO_TGR_192, HASH_ALGO_SM3_256, + HASH_ALGO_STREEBOG_256, + HASH_ALGO_STREEBOG_512, HASH_ALGO__LAST }; -- cgit v1.2.3 From 80e22e961dfd15530215f6f6dcd94cd8f65ba1ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:49 -0800 Subject: net: sched: gred: provide a better structured dump and expose stats Currently all GRED's virtual queue data is dumped in a single array in a single attribute. This makes it pretty much impossible to add new fields. In order to expose more detailed stats add a new set of attributes. We can now expose the 64 bit value of bytesin and all the mark stats which were not part of the original design. Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ee017bc057a3..c8f717346b60 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -291,11 +291,37 @@ enum { TCA_GRED_DPS, TCA_GRED_MAX_P, TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + struct tc_gred_qopt { __u32 limit; /* HARD maximal queue length (bytes) */ __u32 qth_min; /* Min average length threshold (bytes) */ -- cgit v1.2.3 From 72111015024f4eddb5aac400ddbe38a4f8f0279a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Nov 2018 22:23:51 -0800 Subject: net: sched: gred: allow manipulating per-DP RED flags Allow users to set and dump RED flags (ECN enabled and harddrop) on per-virtual queue basis. Validation of attributes is split from changes to make sure we won't have to undo previous operations when we find out configuration is invalid. The objective is to allow changing per-Qdisc parameters without overwriting the per-vq configured flags. Old user space will not pass the TCA_GRED_VQ_FLAGS attribute and per-Qdisc flags will always get propagated to the virtual queues. New user space which wants to make use of per-vq flags should set per-Qdisc flags to 0 and then configure per-vq flags as it sees fit. Once per-vq flags are set per-Qdisc flags can't be changed to non-zero. Vice versa - if the per-Qdisc flags are non-zero the TCA_GRED_VQ_FLAGS attribute has to either be omitted or set to the same value as per-Qdisc flags. Update per-Qdisc parameters: per-Qdisc | per-VQ | result 0 | 0 | all vq flags updated 0 | non-0 | error (vq flags in use) non-0 | 0 | -- impossible -- non-0 | non-0 | all vq flags updated Update per-VQ state (flags parameter not specified): no change to flags Update per-VQ state (flags parameter set): per-Qdisc | per-VQ | result 0 | any | per-vq flags updated non-0 | 0 | -- impossible -- non-0 | non-0 | error (per-Qdisc flags in use) Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index c8f717346b60..0d18b1d1fbbc 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -317,6 +317,7 @@ enum { TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ TCA_GRED_VQ_STAT_PDROP, /* u32 */ TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ __TCA_GRED_VQ_MAX }; -- cgit v1.2.3 From 54e8cb786130949a4d37792383cb528176771e5d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:26:51 -0800 Subject: uapi/ethtool: fix spelling errors Trivial spelling errors found by codespell. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c8f8e2455bf3..17be76aeb468 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -882,7 +882,7 @@ struct ethtool_rx_flow_spec { __u32 location; }; -/* How rings are layed out when accessing virtual functions or +/* How rings are laid out when accessing virtual functions or * offloaded queues is device specific. To allow users to do flow * steering and specify these queues the ring cookie is partitioned * into a 32bit queue index with an 8 bit virtual function id. @@ -891,7 +891,7 @@ struct ethtool_rx_flow_spec { * devices start supporting PCIe w/ARI. However at the moment I * do not know of any devices that support this so I do not reserve * space for this at this time. If a future patch consumes the next - * byte it should be aware of this possiblity. + * byte it should be aware of this possibility. */ #define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL #define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL -- cgit v1.2.3 From e8bd8fca6773ef49390269bd467bf940a0841ccf Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 15 Nov 2018 16:44:12 -0800 Subject: tcp: add SRTT to SCM_TIMESTAMPING_OPT_STATS Add TCP_NLA_SRTT to SCM_TIMESTAMPING_OPT_STATS that reports the smoothed round trip time in microseconds (tcp_sock.srtt_us >> 3). Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e02d31986ff9..8bb6cc5f3235 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -266,6 +266,7 @@ enum { TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */ TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ TCP_NLA_REORD_SEEN, /* reordering events seen */ + TCP_NLA_SRTT, /* smoothed RTT in usecs */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 8d951a75d022d94a05f5fa74217670a981e8302d Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Fri, 16 Nov 2018 15:51:59 +1100 Subject: net/ncsi: Configure multi-package, multi-channel modes with failover This patch extends the ncsi-netlink interface with two new commands and three new attributes to configure multiple packages and/or channels at once, and configure specific failover modes. NCSI_CMD_SET_PACKAGE mask and NCSI_CMD_SET_CHANNEL_MASK set a whitelist of packages or channels allowed to be configured with the NCSI_ATTR_PACKAGE_MASK and NCSI_ATTR_CHANNEL_MASK attributes respectively. If one of these whitelists is set only packages or channels matching the whitelist are considered for the channel queue in ncsi_choose_active_channel(). These commands may also use the NCSI_ATTR_MULTI_FLAG to signal that multiple packages or channels may be configured simultaneously. NCSI hardware arbitration (HWA) must be available in order to enable multi-package mode. Multi-channel mode is always available. If the NCSI_ATTR_CHANNEL_ID attribute is present in the NCSI_CMD_SET_CHANNEL_MASK command the it sets the preferred channel as with the NCSI_CMD_SET_INTERFACE command. The combination of preferred channel and channel whitelist defines a primary channel and the allowed failover channels. If the NCSI_ATTR_MULTI_FLAG attribute is also present then the preferred channel is configured for Tx/Rx and the other channels are enabled only for Rx. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/uapi/linux/ncsi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h index 0a26a5576645..a3f87c54fdb3 100644 --- a/include/uapi/linux/ncsi.h +++ b/include/uapi/linux/ncsi.h @@ -26,6 +26,12 @@ * @NCSI_CMD_SEND_CMD: send NC-SI command to network card. * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID * and NCSI_ATTR_CHANNEL_ID. + * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed packages. + * Requires NCSI_ATTR_IFINDEX and NCSI_ATTR_PACKAGE_MASK. + * @NCSI_CMD_SET_CHANNEL_MASK: set a whitelist of allowed channels. + * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID, and + * NCSI_ATTR_CHANNEL_MASK. If NCSI_ATTR_CHANNEL_ID is present it sets + * the primary channel. * @NCSI_CMD_MAX: highest command number */ enum ncsi_nl_commands { @@ -34,6 +40,8 @@ enum ncsi_nl_commands { NCSI_CMD_SET_INTERFACE, NCSI_CMD_CLEAR_INTERFACE, NCSI_CMD_SEND_CMD, + NCSI_CMD_SET_PACKAGE_MASK, + NCSI_CMD_SET_CHANNEL_MASK, __NCSI_CMD_AFTER_LAST, NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1 @@ -48,6 +56,10 @@ enum ncsi_nl_commands { * @NCSI_ATTR_PACKAGE_ID: package ID * @NCSI_ATTR_CHANNEL_ID: channel ID * @NCSI_ATTR_DATA: command payload + * @NCSI_ATTR_MULTI_FLAG: flag to signal that multi-mode should be enabled with + * NCSI_CMD_SET_PACKAGE_MASK or NCSI_CMD_SET_CHANNEL_MASK. + * @NCSI_ATTR_PACKAGE_MASK: 32-bit mask of allowed packages. + * @NCSI_ATTR_CHANNEL_MASK: 32-bit mask of allowed channels. * @NCSI_ATTR_MAX: highest attribute number */ enum ncsi_nl_attrs { @@ -57,6 +69,9 @@ enum ncsi_nl_attrs { NCSI_ATTR_PACKAGE_ID, NCSI_ATTR_CHANNEL_ID, NCSI_ATTR_DATA, + NCSI_ATTR_MULTI_FLAG, + NCSI_ATTR_PACKAGE_MASK, + NCSI_ATTR_CHANNEL_MASK, __NCSI_ATTR_AFTER_LAST, NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1 -- cgit v1.2.3 From 2cc0eeb67636e0339ad7b6cdfa305f63983642af Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:51 +0800 Subject: sctp: define subscribe in sctp_sock as __u16 The member subscribe in sctp_sock is used to indicate to which of the events it is subscribed, more like a group of flags. So it's better to be defined as __u16 (2 bytpes), instead of struct sctp_event_subscribe (13 bytes). Note that sctp_event_subscribe is an UAPI struct, used on sockopt calls, and thus it will not be removed. This patch only changes the internal storage of the flags. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c81feb373d3e..66afa5b4ab6b 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -632,7 +632,9 @@ union sctp_notification { */ enum sctp_sn_type { - SCTP_SN_TYPE_BASE = (1<<15), + SCTP_SN_TYPE_BASE = (1<<15), + SCTP_DATA_IO_EVENT = SCTP_SN_TYPE_BASE, +#define SCTP_DATA_IO_EVENT SCTP_DATA_IO_EVENT SCTP_ASSOC_CHANGE, #define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE SCTP_PEER_ADDR_CHANGE, @@ -657,6 +659,8 @@ enum sctp_sn_type { #define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT SCTP_STREAM_CHANGE_EVENT, #define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT + SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT, +#define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX }; /* Notification error codes used to fill up the error fields in some -- cgit v1.2.3 From 480ba9c18a27ff77b02a2012e50dfd3e20ee9f7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 16:08:54 +0800 Subject: sctp: add sockopt SCTP_EVENT This patch adds sockopt SCTP_EVENT described in rfc6525#section-6.2. With this sockopt users can subscribe to an event from a specified asoc. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 66afa5b4ab6b..d584073532b8 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -129,6 +129,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_STREAM_SCHEDULER_VALUE 124 #define SCTP_INTERLEAVING_SUPPORTED 125 #define SCTP_SENDMSG_CONNECT 126 +#define SCTP_EVENT 127 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1154,6 +1155,12 @@ struct sctp_add_streams { uint16_t sas_outstrms; }; +struct sctp_event { + sctp_assoc_t se_assoc_id; + uint16_t se_type; + uint8_t se_on; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, -- cgit v1.2.3 From 96b3b6c9091d23289721350e32c63cc8749686be Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:08 +0000 Subject: bpf: allow zero-initializing hash map seed Add a new flag BPF_F_ZERO_SEED, which forces a hash map to initialize the seed to zero. This is useful when doing performance analysis both on individual BPF programs, as well as the kernel's hash table implementation. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 47d606d744cc..8c01b89a4cb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -269,6 +269,9 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 2f1833607aed6a9c1e1729bf0e2588c341ceb409 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:09 +0000 Subject: bpf: move BPF_F_QUERY_EFFECTIVE after map flags BPF_F_QUERY_EFFECTIVE is in the middle of the flags valid for BPF_MAP_CREATE. Move it to its own section to reduce confusion. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8c01b89a4cb4..05d95290b848 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -257,9 +257,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -272,6 +269,9 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, -- cgit v1.2.3 From 23464f8c3407b83106463999b64fe10dc66ff6a3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:33 +0900 Subject: aio: Comment use of IOCB_FLAG_IOPRIO aio flag Comment the use of the IOCB_FLAG_IOPRIO aio flag similarly to the IOCB_FLAG_RESFD flag. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/uapi/linux/aio_abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index ce43d340f010..8387e0af0f76 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -50,6 +50,8 @@ enum { * * IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb" * is valid. + * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb" + * is valid. */ #define IOCB_FLAG_RESFD (1 << 0) #define IOCB_FLAG_IOPRIO (1 << 1) -- cgit v1.2.3 From 2667a2626f4da370409c2830552f6e8c8b8c41e2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:08 -0800 Subject: bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO to support the function debug info. BTF_KIND_FUNC_PROTO must not have a name (i.e. !t->name_off) and it is followed by >= 0 'struct bpf_param' objects to describe the function arguments. The BTF_KIND_FUNC must have a valid name and it must refer back to a BTF_KIND_FUNC_PROTO. The above is the conclusion after the discussion between Edward Cree, Alexei, Daniel, Yonghong and Martin. By combining BTF_KIND_FUNC and BTF_LIND_FUNC_PROTO, a complete function signature can be obtained. It will be used in the later patches to learn the function signature of a running bpf program. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/btf.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:11 -0800 Subject: bpf: Introduce bpf_func_info This patch added interface to load a program with the following additional information: . prog_btf_fd . func_info, func_info_rec_size and func_info_cnt where func_info will provide function range and type_id corresponding to each function. The func_info_rec_size is introduced in the UAPI to specify struct bpf_func_info size passed from user space. This intends to make bpf_func_info structure growable in the future. If the kernel gets a different bpf_func_info size from userspace, it will try to handle user request with part of bpf_func_info it can understand. In this patch, kernel can understand struct bpf_func_info { __u32 insn_offset; __u32 type_id; }; If user passed a bpf func_info record size of 16 bytes, the kernel can still handle part of records with the above definition. If verifier agrees with function range provided by the user, the bpf_prog ksym for each function will use the func name provided in the type_id, which is supposed to provide better encoding as it is not limited by 16 bytes program name limitation and this is better for bpf program which contains multiple subprograms. The bpf_prog_info interface is also extended to return btf_id, func_info, func_info_rec_size and func_info_cnt to userspace, so userspace can print out the function prototype for each xlated function. The insn_offset in the returned func_info corresponds to the insn offset for xlated functions. With other jit related fields in bpf_prog_info, userspace can also print out function prototypes for each jited function. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Thu, 22 Nov 2018 14:39:16 -0500 Subject: bpf: add skb->tstamp r/w access from tc clsact and cg skb progs This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu Acked-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 89a9157e1253bb3384a7596cb51bf1ceeac063ed Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 21 Nov 2018 18:03:18 +0800 Subject: virtio: add packed ring types and macros Add types and macros for packed ring. Signed-off-by: Tiwei Bie Signed-off-by: David S. Miller --- include/uapi/linux/virtio_config.h | 3 +++ include/uapi/linux/virtio_ring.h | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 449132c76b1c..1196e1c1d4f6 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -75,6 +75,9 @@ */ #define VIRTIO_F_IOMMU_PLATFORM 33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 6d5d5faa989b..2414f8af26b3 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -44,6 +44,13 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -53,6 +60,23 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 @@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _UAPI_LINUX_VIRTIO_RING_H */ -- cgit v1.2.3 From a428afe82f98d2ffb31c981671630df1fa25906f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:20 +0200 Subject: net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 ++++++++++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e41eda3c71f1..6dc02c03bdf8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -292,4 +292,22 @@ struct br_mcast_stats { __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; __u64 mcast_packets[BR_MCAST_DIR_SIZE]; }; + +/* bridge boolean options + * IMPORTANT: if adding a new option do not forget to handle + * it in br_boolopt_toggle/get and bridge sysfs + */ +enum br_boolopt_id { + BR_BOOLOPT_MAX +}; + +/* struct br_boolopt_multi - change multiple bridge boolean options + * + * @optval: new option values (bit per option) + * @optmask: options to change (bit per option) + */ +struct br_boolopt_multi { + __u32 optval; + __u32 optmask; +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f42c069d81db..d6533828123a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -288,6 +288,7 @@ enum { IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, + IFLA_BR_MULTI_BOOLOPT, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 70e4272b4c81828e7d942209bae83b9d92752cfe Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 24 Nov 2018 04:34:21 +0200 Subject: net: bridge: add no_linklocal_learn bool option Use the new boolopt API to add an option which disables learning from link-local packets. The default is kept as before and learning is enabled. This is a simple map from a boolopt bit to a bridge private flag that is tested before learning. v2: pass NULL for extack via sysfs Signed-off-by: Nikolay Aleksandrov Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6dc02c03bdf8..773e476a8e54 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -294,10 +294,13 @@ struct br_mcast_stats { }; /* bridge boolean options + * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { + BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From cff478b9d9ccaee0de0e02700c63addf007b5d3c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:04 +0100 Subject: netns: add support of NETNSA_TARGET_NSID Like it was done for link and address, add the ability to perform get/dump in another netns by specifying a target nsid attribute. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0187c74d8889..0ed9dd61d32a 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -16,6 +16,7 @@ enum { NETNSA_NSID, NETNSA_PID, NETNSA_FD, + NETNSA_TARGET_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 288f06a001eb6265122c620295b68a0dd53d1482 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 26 Nov 2018 15:42:06 +0100 Subject: netns: enable to dump full nsid translation table Like the previous patch, the goal is to ease to convert nsids from one netns to another netns. A new attribute (NETNSA_CURRENT_NSID) is added to the kernel answer when NETNSA_TARGET_NSID is provided, thus the user can easily convert nsids. Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h index 0ed9dd61d32a..9f9956809565 100644 --- a/include/uapi/linux/net_namespace.h +++ b/include/uapi/linux/net_namespace.h @@ -17,6 +17,7 @@ enum { NETNSA_PID, NETNSA_FD, NETNSA_TARGET_NSID, + NETNSA_CURRENT_NSID, __NETNSA_MAX, }; -- cgit v1.2.3 From 7246d8ed4dcce23f7509949a77be15fa9f0e3d28 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 26 Nov 2018 14:16:17 -0800 Subject: bpf: helper to pop data from messages This adds a BPF SK_MSG program helper so that we can pop data from a msg. We use this to pop metadata from a previous push data call. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 4f693b55c3d2d2239b8a0094b518a1e533cf75d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Nov 2018 14:42:03 -0800 Subject: tcp: implement coalescing on backlog queue In case GRO is not as efficient as it should be or disabled, we might have a user thread trapped in __release_sock() while softirq handler flood packets up to the point we have to drop. This patch balances work done from user thread and softirq, to give more chances to __release_sock() to complete its work before new packets are added the the backlog. This also helps if we receive many ACK packets, since GRO does not aggregate them. This patch brings ~60% throughput increase on a receiver without GRO, but the spectacular gain is really on 1000x release_sock() latency reduction I have measured. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index f80135e5feaa..86dc24a96c90 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -243,6 +243,7 @@ enum LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */ + LINUX_MIB_TCPBACKLOGCOALESCE, /* TCPBacklogCoalesce */ LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ -- cgit v1.2.3 From 26d31925cd5ea4b5b168ed538b0326d63ccbb384 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 28 Nov 2018 19:12:56 +0100 Subject: tun: implement carrier change The userspace may need to control the carrier state. Signed-off-by: Nicolas Dichtel Signed-off-by: Didier Pallard Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index ee432cd3018c..23a6753b37df 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -59,6 +59,7 @@ #define TUNGETVNETBE _IOR('T', 223, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) +#define TUNSETCARRIER _IOW('T', 226, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From e9ee9efc0d176512cdce9d27ff8549d7ffa2bfcd Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:14 -0800 Subject: bpf: Add BPF_F_ANY_ALIGNMENT. Often we want to write tests cases that check things like bad context offset accesses. And one way to do this is to use an odd offset on, for example, a 32-bit load. This unfortunately triggers the alignment checks first on platforms that do not set CONFIG_EFFICIENT_UNALIGNED_ACCESS. So the test case see the alignment failure rather than what it was testing for. It is often not completely possible to respect the original intention of the test, or even test the same exact thing, while solving the alignment issue. Another option could have been to check the alignment after the context and other validations are performed by the verifier, but that is a non-trivial change to the verifier. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 597afdbc1ab9..8050caea7495 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 -- cgit v1.2.3 From c3e9305983597a61083482581e83f0bd77ba306a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 12 Nov 2018 16:26:44 +0100 Subject: netfilter: remove NFC_* cache bits These are very very (for long time unused) caching infrastructure definition, remove then. They have nothing to do with the NFC subsystem. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 4 ---- include/uapi/linux/netfilter_decnet.h | 10 ---------- include/uapi/linux/netfilter_ipv4.h | 28 ---------------------------- include/uapi/linux/netfilter_ipv6.h | 29 ----------------------------- 4 files changed, 71 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index cca10e767cd8..ca9e63d6e0e4 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -34,10 +34,6 @@ /* only for userspace compatibility */ #ifndef __KERNEL__ -/* Generic cache responses from hook functions. - <= 0x2000 is used for protocol-flags. */ -#define NFC_UNKNOWN 0x4000 -#define NFC_ALTERED 0x8000 /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ #define NF_VERDICT_BITS 16 diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h index 61f1c7dfd033..3c77f54560f2 100644 --- a/include/uapi/linux/netfilter_decnet.h +++ b/include/uapi/linux/netfilter_decnet.h @@ -15,16 +15,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_DN_SRC 0x0001 -/* Dest IP address. */ -#define NFC_DN_DST 0x0002 -/* Input device. */ -#define NFC_DN_IF_IN 0x0004 -/* Output device. */ -#define NFC_DN_IF_OUT 0x0008 - /* kernel define is in netfilter_defs.h */ #define NF_DN_NUMHOOKS 7 #endif /* ! __KERNEL__ */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index c3b060775e13..155e77d6a42d 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -13,34 +13,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP_DST 0x0002 -/* Input device. */ -#define NFC_IP_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP_TOS 0x0010 -/* Protocol. */ -#define NFC_IP_PROTO 0x0020 -/* IP options. */ -#define NFC_IP_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP_FRAG 0x0080 - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP_PROTO_UNKNOWN 0x2000 - /* IP Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP_PRE_ROUTING 0 diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index dc624fd24d25..80aa9b0799af 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -16,35 +16,6 @@ #include /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP6_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP6_DST 0x0002 -/* Input device. */ -#define NFC_IP6_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP6_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP6_TOS 0x0010 -/* Protocol. */ -#define NFC_IP6_PROTO 0x0020 -/* IP options. */ -#define NFC_IP6_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP6_FRAG 0x0080 - - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP6_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP6_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP6_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP6_PROTO_UNKNOWN 0x2000 - /* IP6 Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP6_PRE_ROUTING 0 -- cgit v1.2.3 From e3da08d057002f9d0831949d51666c3e15dc6b29 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Sun, 2 Dec 2018 20:18:19 -0500 Subject: bpf: allow BPF read access to qdisc pkt_len The pkt_len field in qdisc_skb_cb stores the skb length as it will appear on the wire after segmentation. For byte accounting, this value is more accurate than skb->len. It is computed on entry to the TC layer, so only valid there. Allow read access to this field from BPF tc classifier and action programs. The implementation is analogous to tc_classid, aside from restricting to read access. To distinguish it from skb->len and self-describe export as wire_len. Changes v1->v2 - Rename pkt_len to wire_len Signed-off-by: Petar Penkov Signed-off-by: Vlad Dumitrescu Signed-off-by: Willem de Bruijn Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8050caea7495..0183b8e70a9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2497,6 +2497,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 90b1023f68c78b9b85e364250155776c8e421176 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 3 Dec 2018 12:13:35 +0000 Subject: bpf: fix documentation for eBPF helpers The missing indentation on the "Return" sections for bpf_map_pop_elem() and bpf_map_peek_elem() helpers break RST and man pages generation. This patch fixes them, and moves the description of those two helpers towards the end of the list (even though they are somehow related to the three first helpers for maps, the man page explicitly states that the helpers are sorted in chronological order). While at it, bring other minor formatting edits for eBPF helpers documentation: mostly blank lines removal, RST formatting, or other small nits for consistency. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 90 ++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0183b8e70a9e..572eb2d42768 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -496,18 +496,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1931,9 +1919,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1942,9 +1930,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2089,8 +2077,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2173,21 +2161,22 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * @@ -2195,7 +2184,7 @@ union bpf_attr { * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2221,15 +2210,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to *struct bpf_sock*, or **NULL** in case of failure. + * For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2255,46 +2244,57 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * A pointer to **struct bpf_sock**, or **NULL** in case of + * failure. For sockets with reuseport option, **struct bpf_sock** + * return is from **reuse->socks**\ [] using hash of the packet. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. - * * Return * 0 on success, or a negative error in case of failure. * * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) - * Description + * Description * Will remove *pop* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of msg + * invalid either due to *start* byte not being valid part of *msg* * payload and/or *pop* value being to large. - * * Return - * 0 on success, or a negative erro in case of failure. + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 846e980a87fc30075517d6d979548294d5461bdb Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Mon, 3 Dec 2018 07:58:59 +0000 Subject: devlink: Add 'fw_load_policy' generic parameter Many drivers load the device's firmware image during the initialization flow either from the flash or from the disk. Currently this option is not controlled by the user and the driver decides from where to load the firmware image. 'fw_load_policy' gives the ability to control this option which allows the user to choose between different loading policies supported by the driver. This parameter can be useful while testing and/or debugging the device. For example, testing a firmware bug fix. Signed-off-by: Shalom Toledo Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 79407bbd296d..6e52d3660654 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -163,6 +163,11 @@ enum devlink_param_cmode { DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1 }; +enum devlink_param_fw_load_policy_value { + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER, + DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, -- cgit v1.2.3 From b5a36b1e1b138285ea0df34bf96c759e1e30fafd Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:23 +0000 Subject: bpf: respect size hint to BPF_PROG_TEST_RUN if present Use data_size_out as a size hint when copying test output to user space. ENOSPC is returned if the output buffer is too small. Callers which so far did not set data_size_out are not affected. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 572eb2d42768..c8e1eeee2c5f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -374,8 +374,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; -- cgit v1.2.3 From cc1068eb6ad21a6cf54aa5f9ae25bf50fd5c9d4b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Nov 2018 15:25:04 -0800 Subject: uapi/nl80211: fix spelling errors Spelling errors found by codespell Signed-off-by: Stephen Hemminger Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 51bd85b7d839..2b53c0e949c7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1734,7 +1734,7 @@ enum nl80211_commands { * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID * is included in the probe request, but the match attributes * will never let it go through), -EINVAL may be returned. - * If ommited, no filtering is done. + * If omitted, no filtering is done. * * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes @@ -1839,7 +1839,7 @@ enum nl80211_commands { * * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be * used by the drivers which has MLME in firmware and does not have support - * to report per station tx/rx activity to free up the staion entry from + * to report per station tx/rx activity to free up the station entry from * the list. This needs to be used when the driver advertises the * capability to timeout the stations. * @@ -2200,7 +2200,7 @@ enum nl80211_commands { * * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in * the specified band is to be adjusted before doing - * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparision to figure out + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out * better BSSs. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. * @@ -4910,7 +4910,7 @@ enum nl80211_iface_limit_attrs { * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces * - * The list of these four possiblities could completely be contained + * The list of these four possibilities could completely be contained * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate * that any of these groups must match. * @@ -4940,7 +4940,7 @@ enum nl80211_if_combination_attrs { * enum nl80211_plink_state - state of a mesh peer link finite state machine * * @NL80211_PLINK_LISTEN: initial state, considered the implicit - * state of non existant mesh peer links + * state of non existent mesh peer links * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to * this mesh peer * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received @@ -5432,7 +5432,7 @@ enum nl80211_timeout_reason { * request parameters IE in the probe request * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at - * rate of at least 5.5M. In case non OCE AP is dicovered in the channel, + * rate of at least 5.5M. In case non OCE AP is discovered in the channel, * only the first probe req in the channel will be sent in high rate. * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request * tx deferral (dot11FILSProbeDelay shall be set to 15ms) -- cgit v1.2.3 From d30d42e08c76cb9323ec6121190eb026b07f773b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 5 Dec 2018 17:35:44 -0800 Subject: bpf: Change insn_offset to insn_off in bpf_func_info The later patch will introduce "struct bpf_line_info" which has member "line_off" and "file_off" referring back to the string section in btf. The line_"off" and file_"off" are more consistent to the naming convention in btf.h that means "offset" (e.g. name_off in "struct btf_type"). The to-be-added "struct bpf_line_info" also has another member, "insn_off" which is the same as the "insn_offset" in "struct bpf_func_info". Hence, this patch renames "insn_offset" to "insn_off" for "struct bpf_func_info". Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8e1eeee2c5f..a84fd232d934 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2991,7 +2991,7 @@ struct bpf_flow_keys { }; struct bpf_func_info { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; -- cgit v1.2.3 From 6e8e72cd206e2ba68801e4f2490f639d41808c8d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:18 +0000 Subject: crypto: user - convert all stats from u32 to u64 All the 32-bit fields need to be 64-bit. In some cases, UINT32_MAX crypto operations can be done in seconds. Reported-by: Eric Biggers Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 6dafbc3e4414..9f8187077ce4 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -79,11 +79,11 @@ struct crypto_user_alg { struct crypto_stat { char type[CRYPTO_MAX_NAME]; union { - __u32 stat_encrypt_cnt; - __u32 stat_compress_cnt; - __u32 stat_generate_cnt; - __u32 stat_hash_cnt; - __u32 stat_setsecret_cnt; + __u64 stat_encrypt_cnt; + __u64 stat_compress_cnt; + __u64 stat_generate_cnt; + __u64 stat_hash_cnt; + __u64 stat_setsecret_cnt; }; union { __u64 stat_encrypt_tlen; @@ -92,29 +92,29 @@ struct crypto_stat { __u64 stat_hash_tlen; }; union { - __u32 stat_akcipher_err_cnt; - __u32 stat_cipher_err_cnt; - __u32 stat_compress_err_cnt; - __u32 stat_aead_err_cnt; - __u32 stat_hash_err_cnt; - __u32 stat_rng_err_cnt; - __u32 stat_kpp_err_cnt; + __u64 stat_akcipher_err_cnt; + __u64 stat_cipher_err_cnt; + __u64 stat_compress_err_cnt; + __u64 stat_aead_err_cnt; + __u64 stat_hash_err_cnt; + __u64 stat_rng_err_cnt; + __u64 stat_kpp_err_cnt; }; union { - __u32 stat_decrypt_cnt; - __u32 stat_decompress_cnt; - __u32 stat_seed_cnt; - __u32 stat_generate_public_key_cnt; + __u64 stat_decrypt_cnt; + __u64 stat_decompress_cnt; + __u64 stat_seed_cnt; + __u64 stat_generate_public_key_cnt; }; union { __u64 stat_decrypt_tlen; __u64 stat_decompress_tlen; }; union { - __u32 stat_verify_cnt; - __u32 stat_compute_shared_secret_cnt; + __u64 stat_verify_cnt; + __u64 stat_compute_shared_secret_cnt; }; - __u32 stat_sign_cnt; + __u64 stat_sign_cnt; }; struct crypto_report_larval { -- cgit v1.2.3 From 7f0a9d5c9d1ba8ab3e5b144e52553744dc0d7471 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:19 +0000 Subject: crypto: user - split user space crypto stat structures It is cleaner to have each stat in their own structures. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 100 +++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 38 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 9f8187077ce4..3a70f025e27d 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -76,45 +76,69 @@ struct crypto_user_alg { __u32 cru_flags; }; -struct crypto_stat { - char type[CRYPTO_MAX_NAME]; - union { - __u64 stat_encrypt_cnt; - __u64 stat_compress_cnt; - __u64 stat_generate_cnt; - __u64 stat_hash_cnt; - __u64 stat_setsecret_cnt; - }; - union { - __u64 stat_encrypt_tlen; - __u64 stat_compress_tlen; - __u64 stat_generate_tlen; - __u64 stat_hash_tlen; - }; - union { - __u64 stat_akcipher_err_cnt; - __u64 stat_cipher_err_cnt; - __u64 stat_compress_err_cnt; - __u64 stat_aead_err_cnt; - __u64 stat_hash_err_cnt; - __u64 stat_rng_err_cnt; - __u64 stat_kpp_err_cnt; - }; - union { - __u64 stat_decrypt_cnt; - __u64 stat_decompress_cnt; - __u64 stat_seed_cnt; - __u64 stat_generate_public_key_cnt; - }; - union { - __u64 stat_decrypt_tlen; - __u64 stat_decompress_tlen; - }; - union { - __u64 stat_verify_cnt; - __u64 stat_compute_shared_secret_cnt; - }; +struct crypto_stat_aead { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_aead_err_cnt; +}; + +struct crypto_stat_akcipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_verify_cnt; __u64 stat_sign_cnt; + __u64 stat_akcipher_err_cnt; +}; + +struct crypto_stat_cipher { + char type[CRYPTO_MAX_NAME]; + __u64 stat_encrypt_cnt; + __u64 stat_encrypt_tlen; + __u64 stat_decrypt_cnt; + __u64 stat_decrypt_tlen; + __u64 stat_cipher_err_cnt; +}; + +struct crypto_stat_compress { + char type[CRYPTO_MAX_NAME]; + __u64 stat_compress_cnt; + __u64 stat_compress_tlen; + __u64 stat_decompress_cnt; + __u64 stat_decompress_tlen; + __u64 stat_compress_err_cnt; +}; + +struct crypto_stat_hash { + char type[CRYPTO_MAX_NAME]; + __u64 stat_hash_cnt; + __u64 stat_hash_tlen; + __u64 stat_hash_err_cnt; +}; + +struct crypto_stat_kpp { + char type[CRYPTO_MAX_NAME]; + __u64 stat_setsecret_cnt; + __u64 stat_generate_public_key_cnt; + __u64 stat_compute_shared_secret_cnt; + __u64 stat_kpp_err_cnt; +}; + +struct crypto_stat_rng { + char type[CRYPTO_MAX_NAME]; + __u64 stat_generate_cnt; + __u64 stat_generate_tlen; + __u64 stat_seed_cnt; + __u64 stat_rng_err_cnt; +}; + +struct crypto_stat_larval { + char type[CRYPTO_MAX_NAME]; }; struct crypto_report_larval { -- cgit v1.2.3 From 44f13133cb03ec32fc88a533673248ef5c0617e3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:25 +0000 Subject: crypto: user - rename err_cnt parameter Since now all crypto stats are on their own structures, it is now useless to have the algorithm name in the err_cnt member. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 3a70f025e27d..4dc1603919ce 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -82,7 +82,7 @@ struct crypto_stat_aead { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_aead_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_akcipher { @@ -93,7 +93,7 @@ struct crypto_stat_akcipher { __u64 stat_decrypt_tlen; __u64 stat_verify_cnt; __u64 stat_sign_cnt; - __u64 stat_akcipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_cipher { @@ -102,7 +102,7 @@ struct crypto_stat_cipher { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_cipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_compress { @@ -111,14 +111,14 @@ struct crypto_stat_compress { __u64 stat_compress_tlen; __u64 stat_decompress_cnt; __u64 stat_decompress_tlen; - __u64 stat_compress_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_hash { char type[CRYPTO_MAX_NAME]; __u64 stat_hash_cnt; __u64 stat_hash_tlen; - __u64 stat_hash_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_kpp { @@ -126,7 +126,7 @@ struct crypto_stat_kpp { __u64 stat_setsecret_cnt; __u64 stat_generate_public_key_cnt; __u64 stat_compute_shared_secret_cnt; - __u64 stat_kpp_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_rng { @@ -134,7 +134,7 @@ struct crypto_stat_rng { __u64 stat_generate_cnt; __u64 stat_generate_tlen; __u64 stat_seed_cnt; - __u64 stat_rng_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_larval { -- cgit v1.2.3 From c454a46b5efd8eff8880e88ece2976e60a26bf35 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:25 -0800 Subject: bpf: Add bpf_line_info support This patch adds bpf_line_info support. It accepts an array of bpf_line_info objects during BPF_PROG_LOAD. The "line_info", "line_info_cnt" and "line_info_rec_size" are added to the "union bpf_attr". The "line_info_rec_size" makes bpf_line_info extensible in the future. The new "check_btf_line()" ensures the userspace line_info is valid for the kernel to use. When the verifier is translating/patching the bpf_prog (through "bpf_patch_insn_single()"), the line_infos' insn_off is also adjusted by the newly added "bpf_adj_linfo()". If the bpf_prog is jited, this patch also provides the jited addrs (in aux->jited_linfo) for the corresponding line_info.insn_off. "bpf_prog_fill_jited_linfo()" is added to fill the aux->jited_linfo. It is currently called by the x86 jit. Other jits can also use "bpf_prog_fill_jited_linfo()" and it will be done in the followup patches. In the future, if it deemed necessary, a particular jit could also provide its own "bpf_prog_fill_jited_linfo()" implementation. A few "*line_info*" fields are added to the bpf_prog_info such that the user can get the xlated line_info back (i.e. the line_info with its insn_off reflecting the translated prog). The jited_line_info is available if the prog is jited. It is an array of __u64. If the prog is not jited, jited_line_info_cnt is 0. The verifier's verbose log with line_info will be done in a follow up patch. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a84fd232d934..7a66db8d15d5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,6 +356,9 @@ union bpf_attr { __u32 func_info_rec_size; /* userspace bpf_func_info size */ __aligned_u64 func_info; /* func info */ __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2679,6 +2682,12 @@ struct bpf_prog_info { __u32 func_info_rec_size; __aligned_u64 func_info; __u32 func_info_cnt; + __u32 line_info_cnt; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 jited_line_info_cnt; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2995,4 +3004,14 @@ struct bpf_func_info { __u32 type_id; }; +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 01d3240a04f4c09392e13c77b54d4423ebce2d72 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 6 Dec 2018 13:01:03 +0000 Subject: media: bpf: add bpf function to report mouse movement Some IR remotes have a directional pad or other pointer-like thing that can be used as a mouse. Make it possible to decode these types of IR protocols in BPF. Cc: netdev@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a66db8d15d5..1bee1135866a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2301,6 +2301,20 @@ union bpf_attr { * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2394,7 +2408,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 11d8b82d2222cade12caad2c125f23023777dcbc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 10 Dec 2018 14:14:08 -0800 Subject: bpf: rename *_info_cnt to nr_*_info in bpf_prog_info In uapi bpf.h, currently we have the following fields in the struct bpf_prog_info: __u32 func_info_cnt; __u32 line_info_cnt; __u32 jited_line_info_cnt; The above field names "func_info_cnt" and "line_info_cnt" also appear in union bpf_attr for program loading. The original intention is to keep the names the same between bpf_prog_info and bpf_attr so it will imply what we returned to user space will be the same as what the user space passed to the kernel. Such a naming convention in bpf_prog_info is not consistent with other fields like: __u32 nr_jited_ksyms; __u32 nr_jited_func_lens; This patch made this adjustment so in bpf_prog_info newly introduced *_info_cnt becomes nr_*_info. Acked-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bee1135866a..f943ed803309 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2696,11 +2696,11 @@ struct bpf_prog_info { __u32 btf_id; __u32 func_info_rec_size; __aligned_u64 func_info; - __u32 func_info_cnt; - __u32 line_info_cnt; + __u32 nr_func_info; + __u32 nr_line_info; __aligned_u64 line_info; __aligned_u64 jited_line_info; - __u32 jited_line_info_cnt; + __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); -- cgit v1.2.3 From 0bd72117fba2dd51a65eaa7b480adc0eea9a4409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 10:26:33 +0100 Subject: bpf: fix up uapi helper description and sync bpf header with tools Minor markup fixup from bpf-next into net-next merge in the BPF helper description of bpf_sk_lookup_tcp() and bpf_sk_lookup_udp(). Also sync up the copy of bpf.h from tooling infrastructure. Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 92e962ba0c47..aa582cd5bfcf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2218,9 +2218,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2254,9 +2254,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description -- cgit v1.2.3 From 6a21cc50f0c7f87dae5259f6cfefe024412313f6 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Sun, 9 Dec 2018 11:24:13 -0700 Subject: seccomp: add a return code to trap to userspace This patch introduces a means for syscalls matched in seccomp to notify some other task that a particular filter has been triggered. The motivation for this is primarily for use with containers. For example, if a container does an init_module(), we obviously don't want to load this untrusted code, which may be compiled for the wrong version of the kernel anyway. Instead, we could parse the module image, figure out which module the container is trying to load and load it on the host. As another example, containers cannot mount() in general since various filesystems assume a trusted image. However, if an orchestrator knows that e.g. a particular block device has not been exposed to a container for writing, it want to allow the container to mount that block device (that is, handle the mount for it). This patch adds functionality that is already possible via at least two other means that I know about, both of which involve ptrace(): first, one could ptrace attach, and then iterate through syscalls via PTRACE_SYSCALL. Unfortunately this is slow, so a faster version would be to install a filter that does SECCOMP_RET_TRACE, which triggers a PTRACE_EVENT_SECCOMP. Since ptrace allows only one tracer, if the container runtime is that tracer, users inside the container (or outside) trying to debug it will not be able to use ptrace, which is annoying. It also means that older distributions based on Upstart cannot boot inside containers using ptrace, since upstart itself uses ptrace to monitor services while starting. The actual implementation of this is fairly small, although getting the synchronization right was/is slightly complex. Finally, it's worth noting that the classic seccomp TOCTOU of reading memory data from the task still applies here, but can be avoided with careful design of the userspace handler: if the userspace handler reads all of the task memory that is necessary before applying its security policy, the tracee's subsequent memory edits will not be read by the tracer. Signed-off-by: Tycho Andersen CC: Kees Cook CC: Andy Lutomirski CC: Oleg Nesterov CC: Eric W. Biederman CC: "Serge E. Hallyn" Acked-by: Serge Hallyn CC: Christian Brauner CC: Tyler Hicks CC: Akihiro Suda Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 9efc0e73d50b..90734aa5aa36 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -15,11 +15,13 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 +#define SECCOMP_GET_NOTIF_SIZES 3 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) -#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) -#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) /* * All BPF programs must return a 32-bit value. @@ -35,6 +37,7 @@ #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ @@ -60,4 +63,35 @@ struct seccomp_data { __u64 args[6]; }; +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #endif /* _UAPI_LINUX_SECCOMP_H */ -- cgit v1.2.3 From c872bdb38febb4c31ece3599c52cf1f833b89f4e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Dec 2018 09:37:46 -0800 Subject: bpf: include sub program tags in bpf_prog_info Changes v2 -> v3: 1. remove check for bpf_dump_raw_ok(). Changes v1 -> v2: 1. Fix error path as Martin suggested. This patch adds nr_prog_tags and prog_tags to bpf_prog_info. This is a reliable way for user space to get tags of all sub programs. Before this patch, user space need to find sub program tags via kallsyms. This feature will be used in BPF introspection, where user space queries information about BPF programs via sys_bpf. Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa582cd5bfcf..e7d57e89f25f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2717,6 +2717,8 @@ struct bpf_prog_info { __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From df9b0e30d44c901ac27c0f38cd54511b3f130c6d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 15 Dec 2018 14:09:06 -0800 Subject: neighbor: Add protocol attribute Similar to routes and rules, add protocol attribute to neighbor entries for easier tracking of how each was created. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 998155444e0d..cd144e3099a3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -28,6 +28,7 @@ enum { NDA_MASTER, NDA_LINK_NETNSID, NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ __NDA_MAX }; -- cgit v1.2.3 From 20427e5db3f96fc054c6a6ad95606906b834deb1 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 3 Dec 2018 20:56:47 +0100 Subject: mmc: document 'Reliable Write' bit in uapi header If we use it this way, people should know about it. Also, replace true/false with nonzero/zero because the flag is not strictly a bool anymore. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- include/uapi/linux/mmc/ioctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 45f369dc0a42..00c08120f3ba 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -5,7 +5,10 @@ #include struct mmc_ioc_cmd { - /* Implies direction of data. true = write, false = read */ + /* + * Direction of data: nonzero = write, zero = read. + * Bit 31 selects 'Reliable Write' for RPMB. + */ int write_flag; /* Application-specific command. true = precede with CMD55 */ -- cgit v1.2.3 From 7239ff4b2be8ec0c3160da7fdd1475785fdb4cb9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 30 Oct 2018 16:43:23 +0200 Subject: btrfs: Introduce support for FSID change without metadata rewrite This field is going to be used when the user wants to change the UUID of the filesystem without having to rewrite all metadata blocks. This field adds another level of indirection such that when the FSID is changed what really happens is the current UUID (the one with which the fs was created) is copied to the 'metadata_uuid' field in the superblock as well as a new incompat flag is set METADATA_UUID. When the kernel detects this flag is set it knows that the superblock in fact has 2 UUIDs: 1. Is the UUID which is user-visible, currently known as FSID. 2. Metadata UUID - this is the UUID which is stamped into all on-disk datastructures belonging to this file system. When the new incompat flag is present device scanning checks whether both fsid/metadata_uuid of the scanned device match any of the registered filesystems. When the flag is not set then both UUIDs are equal and only the FSID is retained on disk, metadata_uuid is set only in-memory during mount. Additionally a new metadata_uuid field is also added to the fs_info struct. It's initialised either with the FSID in case METADATA_UUID incompat flag is not set or with the metdata_uuid of the superblock otherwise. This commit introduces the new fields as well as the new incompat flag and switches all users of the fsid to the new logic. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ minor updates in comments ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5ca1d21fc4a7..e0763bc4158e 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -269,6 +269,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) #define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) +#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) struct btrfs_ioctl_feature_flags { __u64 compat_flags; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index aff1356c2bb8..e974f4bb5378 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -458,6 +458,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) +#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) /* -- cgit v1.2.3 From 6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Dec 2018 00:49:47 +0100 Subject: bpf: remove useless version check for prog load Existing libraries and tracing frameworks work around this kernel version check by automatically deriving the kernel version from uname(3) or similar such that the user does not need to do it manually; these workarounds also make the version check useless at the same time. Moreover, most other BPF tracing types enabling bpf_probe_read()-like functionality have /not/ adapted this check, and in general these days it is well understood anyway that all the tracing programs are not stable with regards to future kernels as kernel internal data structures are subject to change from release to release. Back at last netconf we discussed [0] and agreed to remove this check from bpf_prog_load() and instead document it here in the uapi header that there is no such guarantee for stable API for these programs. [0] http://vger.kernel.org/netconf2018_files/DanielBorkmann_netconf2018.pdf Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e7d57e89f25f..1d324c2cbca2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -343,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ -- cgit v1.2.3 From b61c41c28eb09ae1bb02479a8f65171c037124c6 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 13 Dec 2018 20:23:26 +0300 Subject: Move EM_XTENSA to uapi/linux/elf-em.h This should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_XTENSA in order to define AUDIT_ARCH_XTENSA which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Cc: Max Filippov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Signed-off-by: Dmitry V. Levin Signed-off-by: Max Filippov --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 93722e60204c..d2fb964432f3 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -34,6 +34,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ +#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ -- cgit v1.2.3 From 98c3115a4ec56f03056efd9295e0fcb4c5c57a85 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 20 Nov 2018 03:17:01 +0300 Subject: xtensa: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Signed-off-by: Dmitry V. Levin Acked-by: Max Filippov Signed-off-by: Max Filippov --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 818ae690ab79..9e67fd359d58 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -411,6 +411,7 @@ enum { #define AUDIT_ARCH_TILEGX32 (EM_TILEGX|__AUDIT_ARCH_LE) #define AUDIT_ARCH_TILEPRO (EM_TILEPRO|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_XTENSA (EM_XTENSA) #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 -- cgit v1.2.3 From 9d5f9f701b1891466fb3dbb1806ad97716f95cc3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:51 -0800 Subject: bpf: btf: fix struct/union/fwd types with kind_flag This patch fixed two issues with BTF. One is related to struct/union bitfield encoding and the other is related to forward type. Issue #1 and solution: ====================== Current btf encoding of bitfield follows what pahole generates. For each bitfield, pahole will duplicate the type chain and put the bitfield size at the final int or enum type. Since the BTF enum type cannot encode bit size, pahole workarounds the issue by generating an int type whenever the enum bit size is not 32. For example, -bash-4.4$ cat t.c typedef int ___int; enum A { A1, A2, A3 }; struct t { int a[5]; ___int b:4; volatile enum A c:4; } g; -bash-4.4$ gcc -c -O2 -g t.c The current kernel supports the following BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t size=24 vlen=3 a type_id=5 bits_offset=0 b type_id=9 bits_offset=160 c type_id=11 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 [8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none) [9] TYPEDEF ___int type_id=8 [10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED [11] VOLATILE (anon) type_id=10 Two issues are in the above: . by changing enum type to int, we lost the original type information and this will not be ideal later when we try to convert BTF to a header file. . the type duplication for bitfields will cause BTF bloat. Duplicated types cannot be deduplicated later if the bitfield size is different. To fix this issue, this patch implemented a compatible change for BTF struct type encoding: . the bit 31 of struct_type->info, previously reserved, now is used to indicate whether bitfield_size is encoded in btf_member or not. . if bit 31 of struct_type->info is set, btf_member->offset will encode like: bit 0 - 23: bit offset bit 24 - 31: bitfield size if bit 31 is not set, the old behavior is preserved: bit 0 - 31: bit offset So if the struct contains a bit field, the maximum bit offset will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum bitfield size will be 256 which is enough for today as maximum bitfield in compiler can be 128 where int128 type is supported. This kernel patch intends to support the new BTF encoding: $ pahole -JV t.o [1] TYPEDEF ___int type_id=2 [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [3] ENUM A size=4 vlen=3 A1 val=0 A2 val=1 A3 val=2 [4] STRUCT t kind_flag=1 size=24 vlen=3 a type_id=5 bitfield_size=0 bits_offset=0 b type_id=1 bitfield_size=4 bits_offset=160 c type_id=7 bitfield_size=4 bits_offset=164 [5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5 [6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none) [7] VOLATILE (anon) type_id=3 Issue #2 and solution: ====================== Current forward type in BTF does not specify whether the original type is struct or union. This will not work for type pretty print and BTF-to-header-file conversion as struct/union must be specified. $ cat tt.c struct t; union u; int foo(struct t *t, union u *u) { return 0; } $ gcc -c -g -O2 tt.c $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t type_id=0 [3] PTR (anon) type_id=2 [4] FWD u type_id=0 [5] PTR (anon) type_id=4 To fix this issue, similar to issue #1, type->info bit 31 is used. If the bit is set, it is union type. Otherwise, it is a struct type. $ pahole -JV tt.o [1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED [2] FWD t kind_flag=0 type_id=0 [3] PTR (anon) kind_flag=0 type_id=2 [4] FWD u kind_flag=1 type_id=0 [5] PTR (anon) kind_flag=0 type_id=4 Pahole/LLVM change: =================== The new kind_flag functionality has been implemented in pahole and llvm: https://github.com/yonghong-song/pahole/tree/bitfield https://github.com/yonghong-song/llvm/tree/bitfield Note that pahole hasn't implemented func/func_proto kind and .BTF.ext. So to print function signature with bpftool, the llvm compiler should be used. Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)") Acked-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/uapi/linux/btf.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 14f66948fc95..7b7475ef2f17 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -34,7 +34,9 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -52,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -110,9 +113,22 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; }; +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + /* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". * The exact number of btf_param is stored in the vlen (of the * info in "struct btf_type"). -- cgit v1.2.3 From 30db641ef4f68054db9b191b6c0200fb1a96d458 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 15 Dec 2018 11:03:23 +0200 Subject: cfg80211: clarify LCI/civic location documentation The older code and current userspace assumed that this data is the content of the Measurement Report element, starting with the Measurement Token. Clarify this in the documentation. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2b53c0e949c7..4625a8624ba2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5893,9 +5893,11 @@ enum nl80211_external_auth_action { * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10) + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), + * i.e. starting with the measurement token * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element - * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13) + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), + * i.e. starting with the measurement token * @__NL80211_FTM_RESP_ATTR_LAST: Internal * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. */ @@ -6295,9 +6297,15 @@ enum nl80211_peer_measurement_ftm_failure_reasons { * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note * that standard deviation is the square root of variance, optional) * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) - * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 8. * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer - * (binary, optional) + * (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 11. * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only * * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal -- cgit v1.2.3 From 30c63115e20b70f89b7cfb66b35e2a0ef4b0ef07 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 4 Dec 2018 17:46:52 +0530 Subject: nl80211: Add support to notify radar event info received from STA Currently radar detection and corresponding channel switch is handled at the AP device. STA ignores these detected radar events since the radar signal can be seen mostly by the AP as well. But in scenarios where a radar signal is seen only at STA, notifying this event to the AP which can trigger a channel switch can be useful. Stations can report such radar events autonomously through Spectrum management (Measurement Report) action frame to its AP. The userspace on processing the report can notify the kernel with the use of the added NL80211_CMD_NOTIFY_RADAR to indicate the detected event and inturn adding the reported channel to NOL. Signed-off-by: Sriram R Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4625a8624ba2..31ae5c7f10e3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1060,6 +1060,11 @@ * the measurement completed, using the measurement cookie * (%NL80211_ATTR_COOKIE). * + * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was + * detected and reported by a neighboring device on the channel + * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes + * determining the width and type. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1278,6 +1283,8 @@ enum nl80211_commands { NL80211_CMD_PEER_MEASUREMENT_RESULT, NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + NL80211_CMD_NOTIFY_RADAR, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 3bdbd0228e7555ec745e08469b98e5a0966409d6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 16 Dec 2018 15:47:04 -0800 Subject: bpf: sockmap, metadata support for reporting size of msg This adds metadata to sk_msg_md for BPF programs to read the sk_msg size. When the SK_MSG program is running under an application that is using sendfile the data is not copied into sk_msg buffers by default. Rather the BPF program uses sk_msg_pull_data to read the bytes in. This avoids doing the costly memcopy instructions when they are not in fact needed. However, if we don't know the size of the sk_msg we have to guess if needed bytes are available by doing a pull request which may fail. By including the size of the sk_msg BPF programs can check the size before issuing sk_msg_pull_data requests. Additionally, the same applies for sendmsg calls when the application provides multiple iovs. Here the BPF program needs to pull in data to update data pointers but its not clear where the data ends without a size parameter. In many cases "guessing" is not easy to do and results in multiple calls to pull and without bounded loops everything gets fairly tricky. Clean this up by including a u32 size field. Note, all writes into sk_msg_md are rejected already from sk_msg_is_valid_access so nothing additional is needed there. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d324c2cbca2..91c43884f295 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2665,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { -- cgit v1.2.3 From 3ad20fe393b31025bebfc2d76964561f65df48aa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 14 Dec 2018 13:11:14 +0100 Subject: binder: implement binderfs As discussed at Linux Plumbers Conference 2018 in Vancouver [1] this is the implementation of binderfs. /* Abstract */ binderfs is a backwards-compatible filesystem for Android's binder ipc mechanism. Each ipc namespace will mount a new binderfs instance. Mounting binderfs multiple times at different locations in the same ipc namespace will not cause a new super block to be allocated and hence it will be the same filesystem instance. Each new binderfs mount will have its own set of binder devices only visible in the ipc namespace it has been mounted in. All devices in a new binderfs mount will follow the scheme binder%d and numbering will always start at 0. /* Backwards compatibility */ Devices requested in the Kconfig via CONFIG_ANDROID_BINDER_DEVICES for the initial ipc namespace will work as before. They will be registered via misc_register() and appear in the devtmpfs mount. Specifically, the standard devices binder, hwbinder, and vndbinder will all appear in their standard locations in /dev. Mounting or unmounting the binderfs mount in the initial ipc namespace will have no effect on these devices, i.e. they will neither show up in the binderfs mount nor will they disappear when the binderfs mount is gone. /* binder-control */ Each new binderfs instance comes with a binder-control device. No other devices will be present at first. The binder-control device can be used to dynamically allocate binder devices. All requests operate on the binderfs mount the binder-control device resides in. Assuming a new instance of binderfs has been mounted at /dev/binderfs via mount -t binderfs binderfs /dev/binderfs. Then a request to create a new binder device can be made as illustrated in [2]. Binderfs devices can simply be removed via unlink(). /* Implementation details */ - dynamic major number allocation: When binderfs is registered as a new filesystem it will dynamically allocate a new major number. The allocated major number will be returned in struct binderfs_device when a new binder device is allocated. - global minor number tracking: Minor are tracked in a global idr struct that is capped at BINDERFS_MAX_MINOR. The minor number tracker is protected by a global mutex. This is the only point of contention between binderfs mounts. - struct binderfs_info: Each binderfs super block has its own struct binderfs_info that tracks specific details about a binderfs instance: - ipc namespace - dentry of the binder-control device - root uid and root gid of the user namespace the binderfs instance was mounted in - mountable by user namespace root: binderfs can be mounted by user namespace root in a non-initial user namespace. The devices will be owned by user namespace root. - binderfs binder devices without misc infrastructure: New binder devices associated with a binderfs mount do not use the full misc_register() infrastructure. The misc_register() infrastructure can only create new devices in the host's devtmpfs mount. binderfs does however only make devices appear under its own mountpoint and thus allocates new character device nodes from the inode of the root dentry of the super block. This will have the side-effect that binderfs specific device nodes do not appear in sysfs. This behavior is similar to devpts allocated pts devices and has no effect on the functionality of the ipc mechanism itself. [1]: https://goo.gl/JL2tfX [2]: program to allocate a new binderfs binder device: #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int fd, ret, saved_errno; size_t len; struct binderfs_device device = { 0 }; if (argc < 2) exit(EXIT_FAILURE); len = strlen(argv[1]); if (len > BINDERFS_MAX_NAME) exit(EXIT_FAILURE); memcpy(device.name, argv[1], len); fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); if (fd < 0) { printf("%s - Failed to open binder-control device\n", strerror(errno)); exit(EXIT_FAILURE); } ret = ioctl(fd, BINDER_CTL_ADD, &device); saved_errno = errno; close(fd); errno = saved_errno; if (ret < 0) { printf("%s - Failed to allocate new binder device\n", strerror(errno)); exit(EXIT_FAILURE); } printf("Allocated new binder device with major %d, minor %d, and " "name %s\n", device.major, device.minor, device.name); exit(EXIT_SUCCESS); } Cc: Martijn Coenen Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder_ctl.h | 35 +++++++++++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 2 files changed, 36 insertions(+) create mode 100644 include/uapi/linux/android/binder_ctl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binder_ctl.h new file mode 100644 index 000000000000..65b2efd1a0a5 --- /dev/null +++ b/include/uapi/linux/android/binder_ctl.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 Canonical Ltd. + * + */ + +#ifndef _UAPI_LINUX_BINDER_CTL_H +#define _UAPI_LINUX_BINDER_CTL_H + +#include +#include +#include + +#define BINDERFS_MAX_NAME 255 + +/** + * struct binderfs_device - retrieve information about a new binder device + * @name: the name to use for the new binderfs binder device + * @major: major number allocated for binderfs binder devices + * @minor: minor number allocated for the new binderfs binder device + * + */ +struct binderfs_device { + char name[BINDERFS_MAX_NAME + 1]; + __u8 major; + __u8 minor; +}; + +/** + * Allocate a new binder device. + */ +#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) + +#endif /* _UAPI_LINUX_BINDER_CTL_H */ + diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 96c24478d8ce..f8c00045d537 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -73,6 +73,7 @@ #define DAXFS_MAGIC 0x64646178 #define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define BINDERFS_SUPER_MAGIC 0x6c6f6f70 #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define PIPEFS_MAGIC 0x50495045 #define PROC_SUPER_MAGIC 0x9fa0 -- cgit v1.2.3 From 1f23816b8eb8fdc39990abe166c10a18c16f6b21 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 1 Nov 2018 15:40:35 -0700 Subject: virtio_blk: add discard and write zeroes support In commit 88c85538, "virtio-blk: add discard and write zeroes features to specification" (https://github.com/oasis-tcs/virtio-spec), the virtio block specification has been extended to add VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES commands. This patch enables support for discard and write zeroes in the virtio-blk driver when the device advertises the corresponding features, VIRTIO_BLK_F_DISCARD and VIRTIO_BLK_F_WRITE_ZEROES. Signed-off-by: Changpeng Liu Signed-off-by: Daniel Verkamp Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- include/uapi/linux/virtio_blk.h | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 9ebe4d968dd5..0f99d7b49ede 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -38,6 +38,8 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -86,6 +88,39 @@ struct virtio_blk_config { /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ __u16 num_queues; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ + /* + * The maximum discard sectors (in 512-byte sectors) for + * one segment. + */ + __u32 max_discard_sectors; + /* + * The maximum number of discard segments in a + * discard command. + */ + __u32 max_discard_seg; + /* Discard commands must be aligned to this number of sectors. */ + __u32 discard_sector_alignment; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ + /* + * The maximum number of write zeroes sectors (in 512-byte sectors) in + * one segment. + */ + __u32 max_write_zeroes_sectors; + /* + * The maximum number of segments in a write zeroes + * command. + */ + __u32 max_write_zeroes_seg; + /* + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the + * deallocation of one or more of the sectors. + */ + __u8 write_zeroes_may_unmap; + + __u8 unused1[3]; } __attribute__((packed)); /* @@ -114,6 +149,12 @@ struct virtio_blk_config { /* Get device ID command */ #define VIRTIO_BLK_T_GET_ID 8 +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeroes command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 @@ -133,6 +174,19 @@ struct virtio_blk_outhdr { __virtio64 sector; }; +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 + +/* Discard/write zeroes range for each request. */ +struct virtio_blk_discard_write_zeroes { + /* discard/write zeroes start sector */ + __le64 sector; + /* number of discard/write zeroes sectors */ + __le32 num_sectors; + /* flags for this range */ + __le32 flags; +}; + #ifndef VIRTIO_BLK_NO_LEGACY struct virtio_scsi_inhdr { __virtio32 errors; -- cgit v1.2.3 From 4b86713236e4bd6ea6c881a97711ae039fc4069b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Dec 2018 18:35:09 +0100 Subject: vhost: split structs into a separate header file vhost structs are shared by vhost-kernel and vhost-user. Split them into a separate file to ease copying them into programs that implement either the server or the client side of vhost-user. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 113 +--------------------------------- include/uapi/linux/vhost_types.h | 128 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 111 deletions(-) create mode 100644 include/uapi/linux/vhost_types.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 84c3de89696a..40d028eed645 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -11,94 +11,9 @@ * device configuration. */ +#include #include -#include #include -#include -#include - -struct vhost_vring_state { - unsigned int index; - unsigned int num; -}; - -struct vhost_vring_file { - unsigned int index; - int fd; /* Pass -1 to unbind from file. */ - -}; - -struct vhost_vring_addr { - unsigned int index; - /* Option flags. */ - unsigned int flags; - /* Flag values: */ - /* Whether log address is valid. If set enables logging. */ -#define VHOST_VRING_F_LOG 0 - - /* Start of array of descriptors (virtually contiguous) */ - __u64 desc_user_addr; - /* Used structure address. Must be 32 bit aligned */ - __u64 used_user_addr; - /* Available structure address. Must be 16 bit aligned */ - __u64 avail_user_addr; - /* Logging support. */ - /* Log writes to used structure, at offset calculated from specified - * address. Address must be 32 bit aligned. */ - __u64 log_guest_addr; -}; - -/* no alignment requirement */ -struct vhost_iotlb_msg { - __u64 iova; - __u64 size; - __u64 uaddr; -#define VHOST_ACCESS_RO 0x1 -#define VHOST_ACCESS_WO 0x2 -#define VHOST_ACCESS_RW 0x3 - __u8 perm; -#define VHOST_IOTLB_MISS 1 -#define VHOST_IOTLB_UPDATE 2 -#define VHOST_IOTLB_INVALIDATE 3 -#define VHOST_IOTLB_ACCESS_FAIL 4 - __u8 type; -}; - -#define VHOST_IOTLB_MSG 0x1 -#define VHOST_IOTLB_MSG_V2 0x2 - -struct vhost_msg { - int type; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_msg_v2 { - __u32 type; - __u32 reserved; - union { - struct vhost_iotlb_msg iotlb; - __u8 padding[64]; - }; -}; - -struct vhost_memory_region { - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; - __u64 flags_padding; /* No flags are currently specified. */ -}; - -/* All region addresses and sizes must be 4K aligned. */ -#define VHOST_PAGE_SIZE 0x1000 - -struct vhost_memory { - __u32 nregions; - __u32 padding; - struct vhost_memory_region regions[0]; -}; /* ioctls */ @@ -186,31 +101,7 @@ struct vhost_memory { * device. This can be used to stop the ring (e.g. for migration). */ #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) -/* Feature bits */ -/* Log all write descriptors. Can be changed while device is active. */ -#define VHOST_F_LOG_ALL 26 -/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ -#define VHOST_NET_F_VIRTIO_NET_HDR 27 - -/* VHOST_SCSI specific definitions */ - -/* - * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. - * - * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + - * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage - * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. - * All the targets under vhost_wwpn can be seen and used by guset. - */ - -#define VHOST_SCSI_ABI_VERSION 1 - -struct vhost_scsi_target { - int abi_version; - char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ - unsigned short vhost_tpgt; - unsigned short reserved; -}; +/* VHOST_SCSI specific defines */ #define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h new file mode 100644 index 000000000000..c907290ff065 --- /dev/null +++ b/include/uapi/linux/vhost_types.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_VHOST_TYPES_H +#define _LINUX_VHOST_TYPES_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include +#include +#include +#include + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +/* no alignment requirement */ +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_msg_v2 { + __u32 type; + __u32 reserved; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt field in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif -- cgit v1.2.3